| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 6736, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009501187648456057, |
| "grad_norm": 54.75, |
| "learning_rate": 9.980997624703087e-07, |
| "logits/chosen": 0.09168118238449097, |
| "logits/rejected": 0.12129797041416168, |
| "logps/chosen": -36.49408721923828, |
| "logps/rejected": -49.25672149658203, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": 0.008192603476345539, |
| "rewards/margins": 0.0024515376426279545, |
| "rewards/rejected": 0.005741065833717585, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.019002375296912115, |
| "grad_norm": 59.75, |
| "learning_rate": 9.961995249406174e-07, |
| "logits/chosen": 0.08897414803504944, |
| "logits/rejected": 0.1399449110031128, |
| "logps/chosen": -37.52133560180664, |
| "logps/rejected": -51.12624740600586, |
| "loss": 0.6829, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": 0.017915938049554825, |
| "rewards/margins": 0.021237602457404137, |
| "rewards/rejected": -0.003321664407849312, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.028503562945368172, |
| "grad_norm": 63.75, |
| "learning_rate": 9.942992874109262e-07, |
| "logits/chosen": 0.09679778665304184, |
| "logits/rejected": 0.16931985318660736, |
| "logps/chosen": -36.65215301513672, |
| "logps/rejected": -49.573421478271484, |
| "loss": 0.6798, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.03151966631412506, |
| "rewards/margins": 0.027938464656472206, |
| "rewards/rejected": 0.003581203753128648, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03800475059382423, |
| "grad_norm": 63.25, |
| "learning_rate": 9.923990498812351e-07, |
| "logits/chosen": 0.16125047206878662, |
| "logits/rejected": 0.24480265378952026, |
| "logps/chosen": -38.162559509277344, |
| "logps/rejected": -52.15303039550781, |
| "loss": 0.6703, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": 0.03711671382188797, |
| "rewards/margins": 0.047640666365623474, |
| "rewards/rejected": -0.010523954406380653, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.047505938242280284, |
| "grad_norm": 64.5, |
| "learning_rate": 9.904988123515439e-07, |
| "logits/chosen": 0.06547170132398605, |
| "logits/rejected": 0.13260145485401154, |
| "logps/chosen": -36.432579040527344, |
| "logps/rejected": -48.84739303588867, |
| "loss": 0.6626, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": 0.0562339723110199, |
| "rewards/margins": 0.0634559616446495, |
| "rewards/rejected": -0.00722198560833931, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.057007125890736345, |
| "grad_norm": 55.25, |
| "learning_rate": 9.885985748218526e-07, |
| "logits/chosen": 0.07628384232521057, |
| "logits/rejected": 0.13675282895565033, |
| "logps/chosen": -35.15069580078125, |
| "logps/rejected": -47.46355438232422, |
| "loss": 0.6585, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 0.06429558247327805, |
| "rewards/margins": 0.0723925530910492, |
| "rewards/rejected": -0.008096965961158276, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0665083135391924, |
| "grad_norm": 50.75, |
| "learning_rate": 9.866983372921614e-07, |
| "logits/chosen": 0.11520403623580933, |
| "logits/rejected": 0.16309119760990143, |
| "logps/chosen": -36.81483840942383, |
| "logps/rejected": -50.9425163269043, |
| "loss": 0.6533, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": 0.06526956707239151, |
| "rewards/margins": 0.08356323093175888, |
| "rewards/rejected": -0.018293654546141624, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07600950118764846, |
| "grad_norm": 70.5, |
| "learning_rate": 9.847980997624703e-07, |
| "logits/chosen": 0.08769215643405914, |
| "logits/rejected": 0.17430077493190765, |
| "logps/chosen": -37.91537094116211, |
| "logps/rejected": -52.815677642822266, |
| "loss": 0.6492, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": 0.058046698570251465, |
| "rewards/margins": 0.09332208335399628, |
| "rewards/rejected": -0.035275377333164215, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0855106888361045, |
| "grad_norm": 56.5, |
| "learning_rate": 9.82897862232779e-07, |
| "logits/chosen": 0.1254298835992813, |
| "logits/rejected": 0.1748633235692978, |
| "logps/chosen": -36.702362060546875, |
| "logps/rejected": -50.70151901245117, |
| "loss": 0.6422, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": 0.06117745488882065, |
| "rewards/margins": 0.10814622789621353, |
| "rewards/rejected": -0.04696878045797348, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09501187648456057, |
| "grad_norm": 61.75, |
| "learning_rate": 9.809976247030878e-07, |
| "logits/chosen": 0.09889174997806549, |
| "logits/rejected": 0.09486458450555801, |
| "logps/chosen": -37.1884651184082, |
| "logps/rejected": -48.955718994140625, |
| "loss": 0.6406, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": 0.08231806755065918, |
| "rewards/margins": 0.11266255378723145, |
| "rewards/rejected": -0.030344482511281967, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10451306413301663, |
| "grad_norm": 61.0, |
| "learning_rate": 9.790973871733966e-07, |
| "logits/chosen": 0.12300257384777069, |
| "logits/rejected": 0.1372351050376892, |
| "logps/chosen": -37.07274627685547, |
| "logps/rejected": -51.80104446411133, |
| "loss": 0.6329, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.07423266023397446, |
| "rewards/margins": 0.1288163661956787, |
| "rewards/rejected": -0.05458369851112366, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11401425178147269, |
| "grad_norm": 56.25, |
| "learning_rate": 9.771971496437053e-07, |
| "logits/chosen": 0.14542920887470245, |
| "logits/rejected": 0.1965111494064331, |
| "logps/chosen": -35.992820739746094, |
| "logps/rejected": -49.162193298339844, |
| "loss": 0.6215, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": 0.09972672164440155, |
| "rewards/margins": 0.1545056849718094, |
| "rewards/rejected": -0.05477897450327873, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.12351543942992874, |
| "grad_norm": 73.5, |
| "learning_rate": 9.752969121140143e-07, |
| "logits/chosen": 0.09255525469779968, |
| "logits/rejected": 0.19868624210357666, |
| "logps/chosen": -35.94331741333008, |
| "logps/rejected": -51.20383071899414, |
| "loss": 0.6101, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.10126903653144836, |
| "rewards/margins": 0.17989249527454376, |
| "rewards/rejected": -0.0786234438419342, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1330166270783848, |
| "grad_norm": 58.25, |
| "learning_rate": 9.73396674584323e-07, |
| "logits/chosen": 0.13323335349559784, |
| "logits/rejected": 0.2155267596244812, |
| "logps/chosen": -38.20836639404297, |
| "logps/rejected": -52.9131965637207, |
| "loss": 0.6177, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.05846577137708664, |
| "rewards/margins": 0.16329729557037354, |
| "rewards/rejected": -0.1048315167427063, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.14251781472684086, |
| "grad_norm": 54.75, |
| "learning_rate": 9.714964370546317e-07, |
| "logits/chosen": 0.09854070097208023, |
| "logits/rejected": 0.20600098371505737, |
| "logps/chosen": -35.198726654052734, |
| "logps/rejected": -50.93071365356445, |
| "loss": 0.6034, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": 0.0857614204287529, |
| "rewards/margins": 0.19499169290065765, |
| "rewards/rejected": -0.10923026502132416, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.15201900237529692, |
| "grad_norm": 60.25, |
| "learning_rate": 9.695961995249405e-07, |
| "logits/chosen": 0.11161897331476212, |
| "logits/rejected": 0.14389154314994812, |
| "logps/chosen": -37.637821197509766, |
| "logps/rejected": -51.35750198364258, |
| "loss": 0.6147, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": 0.03235001862049103, |
| "rewards/margins": 0.17306238412857056, |
| "rewards/rejected": -0.14071235060691833, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.16152019002375298, |
| "grad_norm": 61.5, |
| "learning_rate": 9.676959619952494e-07, |
| "logits/chosen": 0.045116275548934937, |
| "logits/rejected": 0.12899161875247955, |
| "logps/chosen": -36.590667724609375, |
| "logps/rejected": -52.80071258544922, |
| "loss": 0.6001, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": 0.04269418492913246, |
| "rewards/margins": 0.20594461262226105, |
| "rewards/rejected": -0.16325044631958008, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.171021377672209, |
| "grad_norm": 57.0, |
| "learning_rate": 9.657957244655582e-07, |
| "logits/chosen": 0.10446017980575562, |
| "logits/rejected": 0.1502765715122223, |
| "logps/chosen": -36.7647705078125, |
| "logps/rejected": -50.46417236328125, |
| "loss": 0.5972, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": 0.0390743762254715, |
| "rewards/margins": 0.21257053315639496, |
| "rewards/rejected": -0.17349614202976227, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.18052256532066507, |
| "grad_norm": 55.0, |
| "learning_rate": 9.63895486935867e-07, |
| "logits/chosen": 0.04691855609416962, |
| "logits/rejected": 0.17598745226860046, |
| "logps/chosen": -35.485595703125, |
| "logps/rejected": -51.60365676879883, |
| "loss": 0.582, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": 0.03419807553291321, |
| "rewards/margins": 0.2475091964006424, |
| "rewards/rejected": -0.2133111208677292, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.19002375296912113, |
| "grad_norm": 59.25, |
| "learning_rate": 9.619952494061757e-07, |
| "logits/chosen": 0.15173594653606415, |
| "logits/rejected": 0.22723865509033203, |
| "logps/chosen": -38.54058837890625, |
| "logps/rejected": -53.069339752197266, |
| "loss": 0.5939, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.0007697370601817966, |
| "rewards/margins": 0.22217878699302673, |
| "rewards/rejected": -0.22294853627681732, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1995249406175772, |
| "grad_norm": 54.5, |
| "learning_rate": 9.600950118764846e-07, |
| "logits/chosen": 0.13400091230869293, |
| "logits/rejected": 0.1886436641216278, |
| "logps/chosen": -37.70890808105469, |
| "logps/rejected": -53.3675651550293, |
| "loss": 0.5775, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.012358499690890312, |
| "rewards/margins": 0.26150980591773987, |
| "rewards/rejected": -0.2738683223724365, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.20902612826603326, |
| "grad_norm": 59.0, |
| "learning_rate": 9.581947743467934e-07, |
| "logits/chosen": 0.15047906339168549, |
| "logits/rejected": 0.2155291885137558, |
| "logps/chosen": -36.07099533081055, |
| "logps/rejected": -51.531551361083984, |
| "loss": 0.577, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": 0.0014424873515963554, |
| "rewards/margins": 0.26147347688674927, |
| "rewards/rejected": -0.26003098487854004, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.21852731591448932, |
| "grad_norm": 61.25, |
| "learning_rate": 9.562945368171021e-07, |
| "logits/chosen": 0.11939611285924911, |
| "logits/rejected": 0.20481155812740326, |
| "logps/chosen": -37.29627990722656, |
| "logps/rejected": -53.07925033569336, |
| "loss": 0.5697, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": 0.005910781677812338, |
| "rewards/margins": 0.28491517901420593, |
| "rewards/rejected": -0.27900439500808716, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.22802850356294538, |
| "grad_norm": 55.75, |
| "learning_rate": 9.543942992874109e-07, |
| "logits/chosen": 0.10844097286462784, |
| "logits/rejected": 0.21932217478752136, |
| "logps/chosen": -37.1666259765625, |
| "logps/rejected": -53.12284851074219, |
| "loss": 0.5754, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.007143110036849976, |
| "rewards/margins": 0.2737848162651062, |
| "rewards/rejected": -0.28092795610427856, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2375296912114014, |
| "grad_norm": 54.75, |
| "learning_rate": 9.524940617577196e-07, |
| "logits/chosen": 0.10385459661483765, |
| "logits/rejected": 0.13318760693073273, |
| "logps/chosen": -35.50322341918945, |
| "logps/rejected": -50.462791442871094, |
| "loss": 0.5679, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.0004334240220487118, |
| "rewards/margins": 0.2884422540664673, |
| "rewards/rejected": -0.28800880908966064, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24703087885985747, |
| "grad_norm": 54.25, |
| "learning_rate": 9.505938242280285e-07, |
| "logits/chosen": 0.08173692971467972, |
| "logits/rejected": 0.14498800039291382, |
| "logps/chosen": -35.70138931274414, |
| "logps/rejected": -54.012001037597656, |
| "loss": 0.5657, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.03053681179881096, |
| "rewards/margins": 0.29130610823631287, |
| "rewards/rejected": -0.3218429386615753, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25653206650831356, |
| "grad_norm": 63.75, |
| "learning_rate": 9.486935866983372e-07, |
| "logits/chosen": 0.08363573253154755, |
| "logits/rejected": 0.18085773289203644, |
| "logps/chosen": -39.11247634887695, |
| "logps/rejected": -55.508888244628906, |
| "loss": 0.5693, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.05329626053571701, |
| "rewards/margins": 0.2859145998954773, |
| "rewards/rejected": -0.3392108976840973, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2660332541567696, |
| "grad_norm": 65.5, |
| "learning_rate": 9.467933491686461e-07, |
| "logits/chosen": 0.07687534391880035, |
| "logits/rejected": 0.2082725167274475, |
| "logps/chosen": -37.038570404052734, |
| "logps/rejected": -54.50634765625, |
| "loss": 0.5379, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.019455045461654663, |
| "rewards/margins": 0.36107519268989563, |
| "rewards/rejected": -0.3805302679538727, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2755344418052256, |
| "grad_norm": 63.25, |
| "learning_rate": 9.448931116389548e-07, |
| "logits/chosen": 0.10213326662778854, |
| "logits/rejected": 0.20626387000083923, |
| "logps/chosen": -37.784019470214844, |
| "logps/rejected": -54.457462310791016, |
| "loss": 0.5493, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -0.04002974182367325, |
| "rewards/margins": 0.3406682312488556, |
| "rewards/rejected": -0.380698025226593, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2850356294536817, |
| "grad_norm": 61.75, |
| "learning_rate": 9.429928741092636e-07, |
| "logits/chosen": 0.13567008078098297, |
| "logits/rejected": 0.24827228486537933, |
| "logps/chosen": -38.69795227050781, |
| "logps/rejected": -54.621788024902344, |
| "loss": 0.5563, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.040405262261629105, |
| "rewards/margins": 0.3285280168056488, |
| "rewards/rejected": -0.3689332604408264, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.29453681710213775, |
| "grad_norm": 58.0, |
| "learning_rate": 9.410926365795724e-07, |
| "logits/chosen": 0.10663773119449615, |
| "logits/rejected": 0.16733931005001068, |
| "logps/chosen": -38.08431625366211, |
| "logps/rejected": -54.35865020751953, |
| "loss": 0.5339, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.04221474006772041, |
| "rewards/margins": 0.37557515501976013, |
| "rewards/rejected": -0.41778990626335144, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.30403800475059384, |
| "grad_norm": 70.0, |
| "learning_rate": 9.391923990498812e-07, |
| "logits/chosen": 0.1082092821598053, |
| "logits/rejected": 0.23803919553756714, |
| "logps/chosen": -38.825382232666016, |
| "logps/rejected": -55.53346252441406, |
| "loss": 0.5598, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -0.09616291522979736, |
| "rewards/margins": 0.31281211972236633, |
| "rewards/rejected": -0.4089750051498413, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31353919239904987, |
| "grad_norm": 50.25, |
| "learning_rate": 9.3729216152019e-07, |
| "logits/chosen": 0.07085791230201721, |
| "logits/rejected": 0.15105192363262177, |
| "logps/chosen": -39.11968994140625, |
| "logps/rejected": -55.4873046875, |
| "loss": 0.5426, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.10313617438077927, |
| "rewards/margins": 0.351255863904953, |
| "rewards/rejected": -0.45439204573631287, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.32304038004750596, |
| "grad_norm": 51.25, |
| "learning_rate": 9.353919239904988e-07, |
| "logits/chosen": 0.10827435553073883, |
| "logits/rejected": 0.17179493606090546, |
| "logps/chosen": -37.49391174316406, |
| "logps/rejected": -55.33747100830078, |
| "loss": 0.5326, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.05370311439037323, |
| "rewards/margins": 0.38469013571739197, |
| "rewards/rejected": -0.438393235206604, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.332541567695962, |
| "grad_norm": 65.5, |
| "learning_rate": 9.334916864608076e-07, |
| "logits/chosen": 0.14238286018371582, |
| "logits/rejected": 0.19145098328590393, |
| "logps/chosen": -37.788291931152344, |
| "logps/rejected": -51.97111892700195, |
| "loss": 0.553, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.08003263175487518, |
| "rewards/margins": 0.33586975932121277, |
| "rewards/rejected": -0.41590237617492676, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.342042755344418, |
| "grad_norm": 52.75, |
| "learning_rate": 9.315914489311163e-07, |
| "logits/chosen": 0.16290903091430664, |
| "logits/rejected": 0.1991579532623291, |
| "logps/chosen": -37.21266174316406, |
| "logps/rejected": -55.373538970947266, |
| "loss": 0.524, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.07777812331914902, |
| "rewards/margins": 0.40286707878112793, |
| "rewards/rejected": -0.48064517974853516, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3515439429928741, |
| "grad_norm": 51.75, |
| "learning_rate": 9.296912114014252e-07, |
| "logits/chosen": 0.08858010172843933, |
| "logits/rejected": 0.16938555240631104, |
| "logps/chosen": -37.08319091796875, |
| "logps/rejected": -53.559356689453125, |
| "loss": 0.5302, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.08191077411174774, |
| "rewards/margins": 0.3878284990787506, |
| "rewards/rejected": -0.46973931789398193, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.36104513064133015, |
| "grad_norm": 57.0, |
| "learning_rate": 9.277909738717339e-07, |
| "logits/chosen": 0.1242557018995285, |
| "logits/rejected": 0.12163582444190979, |
| "logps/chosen": -40.13254165649414, |
| "logps/rejected": -55.82794952392578, |
| "loss": 0.5285, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.10593372583389282, |
| "rewards/margins": 0.4058433473110199, |
| "rewards/rejected": -0.5117770433425903, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.37054631828978624, |
| "grad_norm": 54.25, |
| "learning_rate": 9.258907363420428e-07, |
| "logits/chosen": 0.12377818673849106, |
| "logits/rejected": 0.218975231051445, |
| "logps/chosen": -38.33209228515625, |
| "logps/rejected": -54.284889221191406, |
| "loss": 0.5255, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.10532137006521225, |
| "rewards/margins": 0.40542417764663696, |
| "rewards/rejected": -0.5107455253601074, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.38004750593824227, |
| "grad_norm": 61.25, |
| "learning_rate": 9.239904988123515e-07, |
| "logits/chosen": 0.14407610893249512, |
| "logits/rejected": 0.13527539372444153, |
| "logps/chosen": -39.09244918823242, |
| "logps/rejected": -56.88185501098633, |
| "loss": 0.5161, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.12786737084388733, |
| "rewards/margins": 0.4305534064769745, |
| "rewards/rejected": -0.5584207773208618, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.38954869358669836, |
| "grad_norm": 60.5, |
| "learning_rate": 9.220902612826604e-07, |
| "logits/chosen": 0.08770464360713959, |
| "logits/rejected": 0.1615772247314453, |
| "logps/chosen": -40.214927673339844, |
| "logps/rejected": -57.730621337890625, |
| "loss": 0.4933, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.10945181548595428, |
| "rewards/margins": 0.4860302805900574, |
| "rewards/rejected": -0.5954821109771729, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3990498812351544, |
| "grad_norm": 49.5, |
| "learning_rate": 9.201900237529691e-07, |
| "logits/chosen": 0.1510995626449585, |
| "logits/rejected": 0.20847982168197632, |
| "logps/chosen": -37.78511428833008, |
| "logps/rejected": -55.67912292480469, |
| "loss": 0.4915, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.07964207231998444, |
| "rewards/margins": 0.5031406283378601, |
| "rewards/rejected": -0.5827827453613281, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4085510688836104, |
| "grad_norm": 45.75, |
| "learning_rate": 9.18289786223278e-07, |
| "logits/chosen": 0.08455921709537506, |
| "logits/rejected": 0.19820302724838257, |
| "logps/chosen": -37.23653793334961, |
| "logps/rejected": -52.45553207397461, |
| "loss": 0.5195, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.10724642127752304, |
| "rewards/margins": 0.42769795656204224, |
| "rewards/rejected": -0.5349443554878235, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4180522565320665, |
| "grad_norm": 49.75, |
| "learning_rate": 9.163895486935866e-07, |
| "logits/chosen": 0.0849796012043953, |
| "logits/rejected": 0.1387515664100647, |
| "logps/chosen": -36.640594482421875, |
| "logps/rejected": -56.54729080200195, |
| "loss": 0.4855, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.07652697712182999, |
| "rewards/margins": 0.5180978775024414, |
| "rewards/rejected": -0.5946248173713684, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.42755344418052255, |
| "grad_norm": 50.25, |
| "learning_rate": 9.144893111638954e-07, |
| "logits/chosen": 0.1335923671722412, |
| "logits/rejected": 0.12739677727222443, |
| "logps/chosen": -38.18653106689453, |
| "logps/rejected": -56.48445510864258, |
| "loss": 0.4906, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.10945376753807068, |
| "rewards/margins": 0.4993862211704254, |
| "rewards/rejected": -0.6088399887084961, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.43705463182897863, |
| "grad_norm": 47.75, |
| "learning_rate": 9.125890736342042e-07, |
| "logits/chosen": 0.05496565252542496, |
| "logits/rejected": 0.15505348145961761, |
| "logps/chosen": -38.44123077392578, |
| "logps/rejected": -58.174560546875, |
| "loss": 0.4734, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.1328837275505066, |
| "rewards/margins": 0.5390680432319641, |
| "rewards/rejected": -0.6719517707824707, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.44655581947743467, |
| "grad_norm": 51.0, |
| "learning_rate": 9.106888361045129e-07, |
| "logits/chosen": 0.17514106631278992, |
| "logits/rejected": 0.21084193885326385, |
| "logps/chosen": -39.44230270385742, |
| "logps/rejected": -55.33711624145508, |
| "loss": 0.4927, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.13082744181156158, |
| "rewards/margins": 0.4961455464363098, |
| "rewards/rejected": -0.626973032951355, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.45605700712589076, |
| "grad_norm": 53.5, |
| "learning_rate": 9.087885985748218e-07, |
| "logits/chosen": 0.05614431947469711, |
| "logits/rejected": 0.16487029194831848, |
| "logps/chosen": -38.37213897705078, |
| "logps/rejected": -56.517974853515625, |
| "loss": 0.4808, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.13590717315673828, |
| "rewards/margins": 0.5437510013580322, |
| "rewards/rejected": -0.6796582341194153, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4655581947743468, |
| "grad_norm": 56.5, |
| "learning_rate": 9.068883610451305e-07, |
| "logits/chosen": 0.09643438458442688, |
| "logits/rejected": 0.1764623522758484, |
| "logps/chosen": -37.71235275268555, |
| "logps/rejected": -55.301570892333984, |
| "loss": 0.4736, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.10847848653793335, |
| "rewards/margins": 0.5602878928184509, |
| "rewards/rejected": -0.6687663793563843, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.4750593824228028, |
| "grad_norm": 50.5, |
| "learning_rate": 9.049881235154394e-07, |
| "logits/chosen": 0.10280097275972366, |
| "logits/rejected": 0.24883826076984406, |
| "logps/chosen": -37.97513198852539, |
| "logps/rejected": -55.975555419921875, |
| "loss": 0.4709, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.1473047137260437, |
| "rewards/margins": 0.5804386734962463, |
| "rewards/rejected": -0.72774338722229, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4845605700712589, |
| "grad_norm": 57.0, |
| "learning_rate": 9.030878859857481e-07, |
| "logits/chosen": 0.13920438289642334, |
| "logits/rejected": 0.2100549191236496, |
| "logps/chosen": -38.78316116333008, |
| "logps/rejected": -57.39037322998047, |
| "loss": 0.4858, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.19835878908634186, |
| "rewards/margins": 0.5184602737426758, |
| "rewards/rejected": -0.716819167137146, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.49406175771971494, |
| "grad_norm": 47.25, |
| "learning_rate": 9.01187648456057e-07, |
| "logits/chosen": 0.04185810685157776, |
| "logits/rejected": 0.16661542654037476, |
| "logps/chosen": -38.728607177734375, |
| "logps/rejected": -56.995845794677734, |
| "loss": 0.4705, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.18127599358558655, |
| "rewards/margins": 0.5616625547409058, |
| "rewards/rejected": -0.7429385185241699, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.503562945368171, |
| "grad_norm": 55.5, |
| "learning_rate": 8.992874109263657e-07, |
| "logits/chosen": 0.09122167527675629, |
| "logits/rejected": 0.22163383662700653, |
| "logps/chosen": -37.88745880126953, |
| "logps/rejected": -58.1086311340332, |
| "loss": 0.502, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.20832395553588867, |
| "rewards/margins": 0.4904058575630188, |
| "rewards/rejected": -0.6987298130989075, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5130641330166271, |
| "grad_norm": 53.75, |
| "learning_rate": 8.973871733966746e-07, |
| "logits/chosen": 0.0702139213681221, |
| "logits/rejected": 0.1391507089138031, |
| "logps/chosen": -40.5086784362793, |
| "logps/rejected": -56.50019454956055, |
| "loss": 0.4854, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.26017051935195923, |
| "rewards/margins": 0.5463624000549316, |
| "rewards/rejected": -0.8065329194068909, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5225653206650831, |
| "grad_norm": 46.0, |
| "learning_rate": 8.954869358669833e-07, |
| "logits/chosen": 0.1008288711309433, |
| "logits/rejected": 0.20593173801898956, |
| "logps/chosen": -38.77046585083008, |
| "logps/rejected": -56.287872314453125, |
| "loss": 0.4734, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.18773072957992554, |
| "rewards/margins": 0.567348301410675, |
| "rewards/rejected": -0.7550791501998901, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5320665083135392, |
| "grad_norm": 56.0, |
| "learning_rate": 8.935866983372922e-07, |
| "logits/chosen": 0.061835043132305145, |
| "logits/rejected": 0.16514989733695984, |
| "logps/chosen": -40.11195373535156, |
| "logps/rejected": -58.53046798706055, |
| "loss": 0.4531, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.1927071213722229, |
| "rewards/margins": 0.6243357062339783, |
| "rewards/rejected": -0.817042887210846, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5415676959619953, |
| "grad_norm": 49.5, |
| "learning_rate": 8.916864608076009e-07, |
| "logits/chosen": 0.14350858330726624, |
| "logits/rejected": 0.1577494591474533, |
| "logps/chosen": -39.235504150390625, |
| "logps/rejected": -58.71210861206055, |
| "loss": 0.4787, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.22734667360782623, |
| "rewards/margins": 0.5474963784217834, |
| "rewards/rejected": -0.7748430967330933, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5510688836104513, |
| "grad_norm": 38.25, |
| "learning_rate": 8.897862232779097e-07, |
| "logits/chosen": 0.07540839910507202, |
| "logits/rejected": 0.12306182831525803, |
| "logps/chosen": -37.44085693359375, |
| "logps/rejected": -57.793243408203125, |
| "loss": 0.4301, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.15863735973834991, |
| "rewards/margins": 0.693757176399231, |
| "rewards/rejected": -0.8523945808410645, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5605700712589073, |
| "grad_norm": 47.25, |
| "learning_rate": 8.878859857482185e-07, |
| "logits/chosen": 0.005182682536542416, |
| "logits/rejected": 0.1266525387763977, |
| "logps/chosen": -38.00994873046875, |
| "logps/rejected": -57.80335235595703, |
| "loss": 0.4626, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.16963782906532288, |
| "rewards/margins": 0.6042665839195251, |
| "rewards/rejected": -0.7739044427871704, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5700712589073634, |
| "grad_norm": 55.5, |
| "learning_rate": 8.859857482185272e-07, |
| "logits/chosen": 0.11473742127418518, |
| "logits/rejected": 0.16739153861999512, |
| "logps/chosen": -41.33670425415039, |
| "logps/rejected": -59.52238082885742, |
| "loss": 0.4705, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.25219887495040894, |
| "rewards/margins": 0.5897648334503174, |
| "rewards/rejected": -0.8419637680053711, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5795724465558195, |
| "grad_norm": 46.5, |
| "learning_rate": 8.840855106888361e-07, |
| "logits/chosen": 0.03330547362565994, |
| "logits/rejected": 0.15450525283813477, |
| "logps/chosen": -38.50295639038086, |
| "logps/rejected": -55.91643142700195, |
| "loss": 0.4555, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.1985245943069458, |
| "rewards/margins": 0.6212279200553894, |
| "rewards/rejected": -0.8197525143623352, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5890736342042755, |
| "grad_norm": 49.25, |
| "learning_rate": 8.821852731591448e-07, |
| "logits/chosen": 0.10139049589633942, |
| "logits/rejected": 0.19957150518894196, |
| "logps/chosen": -39.53318405151367, |
| "logps/rejected": -60.60167694091797, |
| "loss": 0.4703, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.23673467338085175, |
| "rewards/margins": 0.5786986351013184, |
| "rewards/rejected": -0.8154332637786865, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5985748218527316, |
| "grad_norm": 42.5, |
| "learning_rate": 8.802850356294537e-07, |
| "logits/chosen": 0.08935532718896866, |
| "logits/rejected": 0.18748882412910461, |
| "logps/chosen": -39.9503059387207, |
| "logps/rejected": -58.329071044921875, |
| "loss": 0.4571, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.23434796929359436, |
| "rewards/margins": 0.6296550631523132, |
| "rewards/rejected": -0.86400306224823, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6080760095011877, |
| "grad_norm": 48.0, |
| "learning_rate": 8.783847980997624e-07, |
| "logits/chosen": 0.12793917953968048, |
| "logits/rejected": 0.26207882165908813, |
| "logps/chosen": -40.79100036621094, |
| "logps/rejected": -63.207977294921875, |
| "loss": 0.4433, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.2365538626909256, |
| "rewards/margins": 0.6767521500587463, |
| "rewards/rejected": -0.9133059978485107, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.6175771971496437, |
| "grad_norm": 44.75, |
| "learning_rate": 8.764845605700713e-07, |
| "logits/chosen": 0.07575452327728271, |
| "logits/rejected": 0.13339047133922577, |
| "logps/chosen": -38.435760498046875, |
| "logps/rejected": -56.613590240478516, |
| "loss": 0.4691, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.20284534990787506, |
| "rewards/margins": 0.6009599566459656, |
| "rewards/rejected": -0.8038052916526794, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6270783847980997, |
| "grad_norm": 49.75, |
| "learning_rate": 8.7458432304038e-07, |
| "logits/chosen": 0.08708982169628143, |
| "logits/rejected": 0.1805320382118225, |
| "logps/chosen": -38.411643981933594, |
| "logps/rejected": -62.70646286010742, |
| "loss": 0.4134, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.2167506217956543, |
| "rewards/margins": 0.7745501399040222, |
| "rewards/rejected": -0.9913008213043213, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.6365795724465558, |
| "grad_norm": 74.5, |
| "learning_rate": 8.726840855106889e-07, |
| "logits/chosen": 0.10599172860383987, |
| "logits/rejected": 0.17720839381217957, |
| "logps/chosen": -41.43864059448242, |
| "logps/rejected": -60.35655212402344, |
| "loss": 0.4606, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.24390998482704163, |
| "rewards/margins": 0.6325302124023438, |
| "rewards/rejected": -0.8764402270317078, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.6460807600950119, |
| "grad_norm": 54.5, |
| "learning_rate": 8.707838479809976e-07, |
| "logits/chosen": 0.1116759404540062, |
| "logits/rejected": 0.12336824834346771, |
| "logps/chosen": -40.634971618652344, |
| "logps/rejected": -57.01872253417969, |
| "loss": 0.4681, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.251184344291687, |
| "rewards/margins": 0.6177032589912415, |
| "rewards/rejected": -0.8688876628875732, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6555819477434679, |
| "grad_norm": 40.0, |
| "learning_rate": 8.688836104513065e-07, |
| "logits/chosen": 0.05607762932777405, |
| "logits/rejected": 0.16543659567832947, |
| "logps/chosen": -40.00341796875, |
| "logps/rejected": -58.64830780029297, |
| "loss": 0.4405, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.2381763756275177, |
| "rewards/margins": 0.6710238456726074, |
| "rewards/rejected": -0.909200131893158, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.665083135391924, |
| "grad_norm": 50.25, |
| "learning_rate": 8.669833729216152e-07, |
| "logits/chosen": 0.049548353999853134, |
| "logits/rejected": 0.13943257927894592, |
| "logps/chosen": -39.979637145996094, |
| "logps/rejected": -59.84418487548828, |
| "loss": 0.4359, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.2502007484436035, |
| "rewards/margins": 0.7026790380477905, |
| "rewards/rejected": -0.9528796672821045, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6745843230403801, |
| "grad_norm": 46.5, |
| "learning_rate": 8.65083135391924e-07, |
| "logits/chosen": 0.08080411702394485, |
| "logits/rejected": 0.15538814663887024, |
| "logps/chosen": -37.723243713378906, |
| "logps/rejected": -58.846256256103516, |
| "loss": 0.422, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.18230988085269928, |
| "rewards/margins": 0.7491470575332642, |
| "rewards/rejected": -0.931456983089447, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.684085510688836, |
| "grad_norm": 38.5, |
| "learning_rate": 8.631828978622328e-07, |
| "logits/chosen": 0.10914994031190872, |
| "logits/rejected": 0.22802668809890747, |
| "logps/chosen": -40.43394088745117, |
| "logps/rejected": -58.36912536621094, |
| "loss": 0.4662, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.2760140001773834, |
| "rewards/margins": 0.613175094127655, |
| "rewards/rejected": -0.8891890048980713, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6935866983372921, |
| "grad_norm": 50.0, |
| "learning_rate": 8.612826603325414e-07, |
| "logits/chosen": 0.09185300767421722, |
| "logits/rejected": 0.1087762862443924, |
| "logps/chosen": -38.46809768676758, |
| "logps/rejected": -57.867942810058594, |
| "loss": 0.4468, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.26649224758148193, |
| "rewards/margins": 0.6601444482803345, |
| "rewards/rejected": -0.9266366362571716, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7030878859857482, |
| "grad_norm": 48.75, |
| "learning_rate": 8.593824228028503e-07, |
| "logits/chosen": 0.09283562749624252, |
| "logits/rejected": 0.1531636118888855, |
| "logps/chosen": -41.238792419433594, |
| "logps/rejected": -59.725738525390625, |
| "loss": 0.447, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.30539485812187195, |
| "rewards/margins": 0.671943187713623, |
| "rewards/rejected": -0.9773380160331726, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.7125890736342043, |
| "grad_norm": 54.25, |
| "learning_rate": 8.57482185273159e-07, |
| "logits/chosen": 0.17938324809074402, |
| "logits/rejected": 0.21232807636260986, |
| "logps/chosen": -39.86995315551758, |
| "logps/rejected": -58.8725471496582, |
| "loss": 0.4251, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.25202998518943787, |
| "rewards/margins": 0.7341017723083496, |
| "rewards/rejected": -0.9861317873001099, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7220902612826603, |
| "grad_norm": 54.25, |
| "learning_rate": 8.555819477434679e-07, |
| "logits/chosen": 0.0566461905837059, |
| "logits/rejected": 0.12666890025138855, |
| "logps/chosen": -39.986106872558594, |
| "logps/rejected": -61.40876388549805, |
| "loss": 0.4067, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.2511664927005768, |
| "rewards/margins": 0.7968044281005859, |
| "rewards/rejected": -1.0479708909988403, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.7315914489311164, |
| "grad_norm": 58.75, |
| "learning_rate": 8.536817102137766e-07, |
| "logits/chosen": 0.08908773213624954, |
| "logits/rejected": 0.13832062482833862, |
| "logps/chosen": -39.75481414794922, |
| "logps/rejected": -60.21009826660156, |
| "loss": 0.4281, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.24241387844085693, |
| "rewards/margins": 0.7212178707122803, |
| "rewards/rejected": -0.9636316895484924, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.7410926365795725, |
| "grad_norm": 52.5, |
| "learning_rate": 8.517814726840855e-07, |
| "logits/chosen": 0.0984007716178894, |
| "logits/rejected": 0.15883538126945496, |
| "logps/chosen": -38.90775680541992, |
| "logps/rejected": -59.53275680541992, |
| "loss": 0.4418, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.27383100986480713, |
| "rewards/margins": 0.7038523554801941, |
| "rewards/rejected": -0.9776833653450012, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.7505938242280285, |
| "grad_norm": 41.25, |
| "learning_rate": 8.498812351543942e-07, |
| "logits/chosen": 0.11719369888305664, |
| "logits/rejected": 0.17845718562602997, |
| "logps/chosen": -38.104705810546875, |
| "logps/rejected": -61.452178955078125, |
| "loss": 0.3903, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.20659206807613373, |
| "rewards/margins": 0.855985701084137, |
| "rewards/rejected": -1.062577724456787, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.7600950118764845, |
| "grad_norm": 45.75, |
| "learning_rate": 8.479809976247031e-07, |
| "logits/chosen": 0.0768904983997345, |
| "logits/rejected": 0.20530077815055847, |
| "logps/chosen": -39.765953063964844, |
| "logps/rejected": -61.358497619628906, |
| "loss": 0.4049, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.2901615798473358, |
| "rewards/margins": 0.8085931539535522, |
| "rewards/rejected": -1.098754644393921, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7695961995249406, |
| "grad_norm": 41.5, |
| "learning_rate": 8.460807600950118e-07, |
| "logits/chosen": 0.1819642186164856, |
| "logits/rejected": 0.20391146838665009, |
| "logps/chosen": -41.09254455566406, |
| "logps/rejected": -62.414024353027344, |
| "loss": 0.4161, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.2619169354438782, |
| "rewards/margins": 0.7796422243118286, |
| "rewards/rejected": -1.0415592193603516, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7790973871733967, |
| "grad_norm": 45.75, |
| "learning_rate": 8.441805225653206e-07, |
| "logits/chosen": 0.07217932492494583, |
| "logits/rejected": 0.15043236315250397, |
| "logps/chosen": -41.25364685058594, |
| "logps/rejected": -59.63226318359375, |
| "loss": 0.4309, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3054697513580322, |
| "rewards/margins": 0.7218286395072937, |
| "rewards/rejected": -1.0272983312606812, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7885985748218527, |
| "grad_norm": 62.75, |
| "learning_rate": 8.422802850356294e-07, |
| "logits/chosen": 0.10031426697969437, |
| "logits/rejected": 0.197869673371315, |
| "logps/chosen": -40.79019546508789, |
| "logps/rejected": -61.019493103027344, |
| "loss": 0.4362, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.2853235602378845, |
| "rewards/margins": 0.7121058106422424, |
| "rewards/rejected": -0.9974292516708374, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7980997624703088, |
| "grad_norm": 54.0, |
| "learning_rate": 8.403800475059381e-07, |
| "logits/chosen": 0.044592004269361496, |
| "logits/rejected": 0.19066573679447174, |
| "logps/chosen": -40.317413330078125, |
| "logps/rejected": -58.56877899169922, |
| "loss": 0.4502, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.30221185088157654, |
| "rewards/margins": 0.6648804545402527, |
| "rewards/rejected": -0.9670923352241516, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.8076009501187649, |
| "grad_norm": 40.75, |
| "learning_rate": 8.38479809976247e-07, |
| "logits/chosen": 0.06595605611801147, |
| "logits/rejected": 0.18088053166866302, |
| "logps/chosen": -39.756439208984375, |
| "logps/rejected": -63.843971252441406, |
| "loss": 0.3955, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.2804669737815857, |
| "rewards/margins": 0.8086118102073669, |
| "rewards/rejected": -1.0890789031982422, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8171021377672208, |
| "grad_norm": 48.0, |
| "learning_rate": 8.365795724465557e-07, |
| "logits/chosen": 0.1287180781364441, |
| "logits/rejected": 0.22206488251686096, |
| "logps/chosen": -41.125389099121094, |
| "logps/rejected": -61.68186950683594, |
| "loss": 0.4428, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.30739057064056396, |
| "rewards/margins": 0.7095006704330444, |
| "rewards/rejected": -1.0168912410736084, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.8266033254156769, |
| "grad_norm": 52.25, |
| "learning_rate": 8.346793349168646e-07, |
| "logits/chosen": 0.053852953016757965, |
| "logits/rejected": 0.13252206146717072, |
| "logps/chosen": -40.2414436340332, |
| "logps/rejected": -61.29638671875, |
| "loss": 0.4071, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3235268294811249, |
| "rewards/margins": 0.811761736869812, |
| "rewards/rejected": -1.1352884769439697, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.836104513064133, |
| "grad_norm": 56.0, |
| "learning_rate": 8.327790973871733e-07, |
| "logits/chosen": 0.0468582957983017, |
| "logits/rejected": 0.11399667710065842, |
| "logps/chosen": -39.64308547973633, |
| "logps/rejected": -59.60944747924805, |
| "loss": 0.4147, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.2946662902832031, |
| "rewards/margins": 0.7918639183044434, |
| "rewards/rejected": -1.086530089378357, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.8456057007125891, |
| "grad_norm": 60.5, |
| "learning_rate": 8.308788598574822e-07, |
| "logits/chosen": 0.09335757046937943, |
| "logits/rejected": 0.10522627085447311, |
| "logps/chosen": -42.684288024902344, |
| "logps/rejected": -61.48929214477539, |
| "loss": 0.4301, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3570723533630371, |
| "rewards/margins": 0.7129464745521545, |
| "rewards/rejected": -1.0700188875198364, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.8551068883610451, |
| "grad_norm": 51.75, |
| "learning_rate": 8.289786223277909e-07, |
| "logits/chosen": 0.07347086817026138, |
| "logits/rejected": 0.15308743715286255, |
| "logps/chosen": -41.02301788330078, |
| "logps/rejected": -61.934791564941406, |
| "loss": 0.4048, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.29406049847602844, |
| "rewards/margins": 0.8210353255271912, |
| "rewards/rejected": -1.1150959730148315, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8646080760095012, |
| "grad_norm": 45.0, |
| "learning_rate": 8.270783847980998e-07, |
| "logits/chosen": 0.0793566182255745, |
| "logits/rejected": 0.17386886477470398, |
| "logps/chosen": -37.58202362060547, |
| "logps/rejected": -59.483882904052734, |
| "loss": 0.4083, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.2546355128288269, |
| "rewards/margins": 0.7988392114639282, |
| "rewards/rejected": -1.0534747838974, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8741092636579573, |
| "grad_norm": 51.5, |
| "learning_rate": 8.251781472684085e-07, |
| "logits/chosen": 0.06765860319137573, |
| "logits/rejected": 0.17789000272750854, |
| "logps/chosen": -38.726783752441406, |
| "logps/rejected": -60.934757232666016, |
| "loss": 0.4203, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.3265742063522339, |
| "rewards/margins": 0.7777712345123291, |
| "rewards/rejected": -1.1043453216552734, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8836104513064132, |
| "grad_norm": 56.5, |
| "learning_rate": 8.232779097387174e-07, |
| "logits/chosen": 0.05681402608752251, |
| "logits/rejected": 0.1788835972547531, |
| "logps/chosen": -38.938507080078125, |
| "logps/rejected": -61.65933609008789, |
| "loss": 0.3918, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.25624170899391174, |
| "rewards/margins": 0.8536828756332397, |
| "rewards/rejected": -1.1099246740341187, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8931116389548693, |
| "grad_norm": 49.25, |
| "learning_rate": 8.213776722090261e-07, |
| "logits/chosen": 0.057860612869262695, |
| "logits/rejected": 0.14475533366203308, |
| "logps/chosen": -37.889766693115234, |
| "logps/rejected": -61.03107452392578, |
| "loss": 0.3916, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.23101815581321716, |
| "rewards/margins": 0.8708871603012085, |
| "rewards/rejected": -1.101905345916748, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.9026128266033254, |
| "grad_norm": 48.5, |
| "learning_rate": 8.194774346793349e-07, |
| "logits/chosen": 0.07886646687984467, |
| "logits/rejected": 0.19103488326072693, |
| "logps/chosen": -40.67872619628906, |
| "logps/rejected": -61.686668395996094, |
| "loss": 0.4185, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3377780318260193, |
| "rewards/margins": 0.795622706413269, |
| "rewards/rejected": -1.133400797843933, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9121140142517815, |
| "grad_norm": 44.0, |
| "learning_rate": 8.175771971496437e-07, |
| "logits/chosen": 0.0845412164926529, |
| "logits/rejected": 0.16773808002471924, |
| "logps/chosen": -40.12324523925781, |
| "logps/rejected": -59.303245544433594, |
| "loss": 0.4103, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.3262190520763397, |
| "rewards/margins": 0.8095906376838684, |
| "rewards/rejected": -1.1358096599578857, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.9216152019002375, |
| "grad_norm": 46.0, |
| "learning_rate": 8.156769596199525e-07, |
| "logits/chosen": 0.08066831529140472, |
| "logits/rejected": 0.17000192403793335, |
| "logps/chosen": -40.7078742980957, |
| "logps/rejected": -62.315185546875, |
| "loss": 0.3972, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.28264638781547546, |
| "rewards/margins": 0.8620734214782715, |
| "rewards/rejected": -1.1447197198867798, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.9311163895486936, |
| "grad_norm": 37.75, |
| "learning_rate": 8.137767220902613e-07, |
| "logits/chosen": 0.08594940602779388, |
| "logits/rejected": 0.22140294313430786, |
| "logps/chosen": -38.64836120605469, |
| "logps/rejected": -62.625946044921875, |
| "loss": 0.376, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.2832408845424652, |
| "rewards/margins": 0.9163691997528076, |
| "rewards/rejected": -1.1996098756790161, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.9406175771971497, |
| "grad_norm": 61.5, |
| "learning_rate": 8.1187648456057e-07, |
| "logits/chosen": 0.07032456994056702, |
| "logits/rejected": 0.17861232161521912, |
| "logps/chosen": -39.57463836669922, |
| "logps/rejected": -60.60888671875, |
| "loss": 0.4058, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.35363835096359253, |
| "rewards/margins": 0.8107983469963074, |
| "rewards/rejected": -1.1644368171691895, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.9501187648456056, |
| "grad_norm": 52.25, |
| "learning_rate": 8.099762470308789e-07, |
| "logits/chosen": 0.08332876116037369, |
| "logits/rejected": 0.1855737268924713, |
| "logps/chosen": -38.6751594543457, |
| "logps/rejected": -59.24980163574219, |
| "loss": 0.4035, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3069637417793274, |
| "rewards/margins": 0.8082422614097595, |
| "rewards/rejected": -1.115206003189087, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9596199524940617, |
| "grad_norm": 45.75, |
| "learning_rate": 8.080760095011876e-07, |
| "logits/chosen": 0.12488888204097748, |
| "logits/rejected": 0.14733757078647614, |
| "logps/chosen": -40.226043701171875, |
| "logps/rejected": -58.75604248046875, |
| "loss": 0.4261, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.35558950901031494, |
| "rewards/margins": 0.7374365925788879, |
| "rewards/rejected": -1.0930261611938477, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.9691211401425178, |
| "grad_norm": 43.5, |
| "learning_rate": 8.061757719714965e-07, |
| "logits/chosen": 0.04622222110629082, |
| "logits/rejected": 0.1545945107936859, |
| "logps/chosen": -39.63484573364258, |
| "logps/rejected": -61.277587890625, |
| "loss": 0.3831, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3327168822288513, |
| "rewards/margins": 0.8751581311225891, |
| "rewards/rejected": -1.2078750133514404, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9786223277909739, |
| "grad_norm": 54.0, |
| "learning_rate": 8.042755344418051e-07, |
| "logits/chosen": 0.09918095171451569, |
| "logits/rejected": 0.18718725442886353, |
| "logps/chosen": -42.15406799316406, |
| "logps/rejected": -62.292816162109375, |
| "loss": 0.4012, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3841949701309204, |
| "rewards/margins": 0.8375911116600037, |
| "rewards/rejected": -1.2217860221862793, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.9881235154394299, |
| "grad_norm": 47.0, |
| "learning_rate": 8.02375296912114e-07, |
| "logits/chosen": 0.06581688672304153, |
| "logits/rejected": 0.16606493294239044, |
| "logps/chosen": -40.35301971435547, |
| "logps/rejected": -60.86550521850586, |
| "loss": 0.4171, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3261529803276062, |
| "rewards/margins": 0.7935004830360413, |
| "rewards/rejected": -1.1196534633636475, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.997624703087886, |
| "grad_norm": 46.75, |
| "learning_rate": 8.004750593824227e-07, |
| "logits/chosen": 0.08406564593315125, |
| "logits/rejected": 0.1631183624267578, |
| "logps/chosen": -42.632301330566406, |
| "logps/rejected": -66.31587219238281, |
| "loss": 0.3864, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3757409453392029, |
| "rewards/margins": 0.8852084875106812, |
| "rewards/rejected": -1.2609493732452393, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.007125890736342, |
| "grad_norm": 53.5, |
| "learning_rate": 7.985748218527315e-07, |
| "logits/chosen": 0.04815902188420296, |
| "logits/rejected": 0.10080163925886154, |
| "logps/chosen": -40.61830520629883, |
| "logps/rejected": -62.35159683227539, |
| "loss": 0.3894, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3303859233856201, |
| "rewards/margins": 0.8965721726417542, |
| "rewards/rejected": -1.2269580364227295, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.0166270783847982, |
| "grad_norm": 58.5, |
| "learning_rate": 7.966745843230403e-07, |
| "logits/chosen": 0.07239064574241638, |
| "logits/rejected": 0.1947220116853714, |
| "logps/chosen": -42.951637268066406, |
| "logps/rejected": -64.53704071044922, |
| "loss": 0.4144, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.397784024477005, |
| "rewards/margins": 0.7604966759681702, |
| "rewards/rejected": -1.158280611038208, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.0261282660332542, |
| "grad_norm": 59.75, |
| "learning_rate": 7.947743467933491e-07, |
| "logits/chosen": 0.08597906678915024, |
| "logits/rejected": 0.14526161551475525, |
| "logps/chosen": -40.39187240600586, |
| "logps/rejected": -64.09424591064453, |
| "loss": 0.3834, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.33142587542533875, |
| "rewards/margins": 0.926814079284668, |
| "rewards/rejected": -1.258239984512329, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.03562945368171, |
| "grad_norm": 40.5, |
| "learning_rate": 7.928741092636579e-07, |
| "logits/chosen": 0.0840529203414917, |
| "logits/rejected": 0.18427981436252594, |
| "logps/chosen": -42.51103591918945, |
| "logps/rejected": -64.92820739746094, |
| "loss": 0.4181, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.39203089475631714, |
| "rewards/margins": 0.7877532839775085, |
| "rewards/rejected": -1.1797842979431152, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.0451306413301662, |
| "grad_norm": 56.5, |
| "learning_rate": 7.909738717339667e-07, |
| "logits/chosen": 0.049816541373729706, |
| "logits/rejected": 0.143840953707695, |
| "logps/chosen": -41.118350982666016, |
| "logps/rejected": -61.427818298339844, |
| "loss": 0.4304, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.38534730672836304, |
| "rewards/margins": 0.7327947020530701, |
| "rewards/rejected": -1.118142008781433, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.0546318289786223, |
| "grad_norm": 59.0, |
| "learning_rate": 7.890736342042755e-07, |
| "logits/chosen": 0.09707099944353104, |
| "logits/rejected": 0.10315439105033875, |
| "logps/chosen": -39.37982940673828, |
| "logps/rejected": -61.087913513183594, |
| "loss": 0.4052, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.30062562227249146, |
| "rewards/margins": 0.834286630153656, |
| "rewards/rejected": -1.1349122524261475, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.0641330166270784, |
| "grad_norm": 46.5, |
| "learning_rate": 7.871733966745842e-07, |
| "logits/chosen": -0.01599489152431488, |
| "logits/rejected": 0.1043221652507782, |
| "logps/chosen": -42.238426208496094, |
| "logps/rejected": -64.75145721435547, |
| "loss": 0.389, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.383517324924469, |
| "rewards/margins": 0.8845040202140808, |
| "rewards/rejected": -1.2680213451385498, |
| "step": 1792 |
| }, |
| { |
| "epoch": 1.0736342042755345, |
| "grad_norm": 60.5, |
| "learning_rate": 7.852731591448931e-07, |
| "logits/chosen": 0.04319116473197937, |
| "logits/rejected": 0.10541323572397232, |
| "logps/chosen": -40.8631706237793, |
| "logps/rejected": -59.777008056640625, |
| "loss": 0.3936, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.30645447969436646, |
| "rewards/margins": 0.8425821661949158, |
| "rewards/rejected": -1.1490366458892822, |
| "step": 1808 |
| }, |
| { |
| "epoch": 1.0831353919239906, |
| "grad_norm": 48.75, |
| "learning_rate": 7.833729216152018e-07, |
| "logits/chosen": 0.10437559336423874, |
| "logits/rejected": 0.1490824967622757, |
| "logps/chosen": -39.50224304199219, |
| "logps/rejected": -62.01877975463867, |
| "loss": 0.3812, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3324194550514221, |
| "rewards/margins": 0.897403359413147, |
| "rewards/rejected": -1.2298228740692139, |
| "step": 1824 |
| }, |
| { |
| "epoch": 1.0926365795724466, |
| "grad_norm": 48.75, |
| "learning_rate": 7.814726840855107e-07, |
| "logits/chosen": -0.01841648668050766, |
| "logits/rejected": 0.10108716785907745, |
| "logps/chosen": -39.490970611572266, |
| "logps/rejected": -59.66122055053711, |
| "loss": 0.3821, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3041851818561554, |
| "rewards/margins": 0.8931726813316345, |
| "rewards/rejected": -1.1973577737808228, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1021377672209025, |
| "grad_norm": 44.0, |
| "learning_rate": 7.795724465558194e-07, |
| "logits/chosen": 0.08540444076061249, |
| "logits/rejected": 0.18863236904144287, |
| "logps/chosen": -41.12372589111328, |
| "logps/rejected": -62.18980026245117, |
| "loss": 0.3981, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3796708285808563, |
| "rewards/margins": 0.852729320526123, |
| "rewards/rejected": -1.2324001789093018, |
| "step": 1856 |
| }, |
| { |
| "epoch": 1.1116389548693586, |
| "grad_norm": 51.0, |
| "learning_rate": 7.776722090261282e-07, |
| "logits/chosen": 0.14734028279781342, |
| "logits/rejected": 0.21354371309280396, |
| "logps/chosen": -43.433292388916016, |
| "logps/rejected": -66.97840118408203, |
| "loss": 0.3905, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.40543994307518005, |
| "rewards/margins": 0.8719741702079773, |
| "rewards/rejected": -1.277414083480835, |
| "step": 1872 |
| }, |
| { |
| "epoch": 1.1211401425178147, |
| "grad_norm": 40.5, |
| "learning_rate": 7.75771971496437e-07, |
| "logits/chosen": 0.04771037772297859, |
| "logits/rejected": 0.14470888674259186, |
| "logps/chosen": -40.52289962768555, |
| "logps/rejected": -63.578758239746094, |
| "loss": 0.3997, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3288933336734772, |
| "rewards/margins": 0.8785545229911804, |
| "rewards/rejected": -1.20744788646698, |
| "step": 1888 |
| }, |
| { |
| "epoch": 1.1306413301662708, |
| "grad_norm": 56.25, |
| "learning_rate": 7.738717339667458e-07, |
| "logits/chosen": 0.02750781551003456, |
| "logits/rejected": 0.12446936219930649, |
| "logps/chosen": -39.09437561035156, |
| "logps/rejected": -60.32615661621094, |
| "loss": 0.3828, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.29237014055252075, |
| "rewards/margins": 0.9157527089118958, |
| "rewards/rejected": -1.208122730255127, |
| "step": 1904 |
| }, |
| { |
| "epoch": 1.1401425178147269, |
| "grad_norm": 50.25, |
| "learning_rate": 7.719714964370546e-07, |
| "logits/chosen": -0.0005194246768951416, |
| "logits/rejected": 0.11599244177341461, |
| "logps/chosen": -38.9823112487793, |
| "logps/rejected": -60.105751037597656, |
| "loss": 0.3918, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3398272395133972, |
| "rewards/margins": 0.8596587181091309, |
| "rewards/rejected": -1.1994860172271729, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.149643705463183, |
| "grad_norm": 52.25, |
| "learning_rate": 7.700712589073634e-07, |
| "logits/chosen": 0.14513945579528809, |
| "logits/rejected": 0.1485050767660141, |
| "logps/chosen": -39.983741760253906, |
| "logps/rejected": -62.480751037597656, |
| "loss": 0.3717, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3441847264766693, |
| "rewards/margins": 0.9267476201057434, |
| "rewards/rejected": -1.2709323167800903, |
| "step": 1936 |
| }, |
| { |
| "epoch": 1.159144893111639, |
| "grad_norm": 56.25, |
| "learning_rate": 7.681710213776722e-07, |
| "logits/chosen": 0.0954248383641243, |
| "logits/rejected": 0.15733623504638672, |
| "logps/chosen": -40.76930618286133, |
| "logps/rejected": -63.525718688964844, |
| "loss": 0.391, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.36353814601898193, |
| "rewards/margins": 0.9048362970352173, |
| "rewards/rejected": -1.2683744430541992, |
| "step": 1952 |
| }, |
| { |
| "epoch": 1.168646080760095, |
| "grad_norm": 55.5, |
| "learning_rate": 7.66270783847981e-07, |
| "logits/chosen": 0.08181347697973251, |
| "logits/rejected": 0.12661109864711761, |
| "logps/chosen": -41.755645751953125, |
| "logps/rejected": -61.083946228027344, |
| "loss": 0.3934, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3673307001590729, |
| "rewards/margins": 0.8563836812973022, |
| "rewards/rejected": -1.2237144708633423, |
| "step": 1968 |
| }, |
| { |
| "epoch": 1.178147268408551, |
| "grad_norm": 52.0, |
| "learning_rate": 7.643705463182898e-07, |
| "logits/chosen": 0.042737413197755814, |
| "logits/rejected": 0.1984286606311798, |
| "logps/chosen": -40.4138298034668, |
| "logps/rejected": -65.36979675292969, |
| "loss": 0.3825, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.35838350653648376, |
| "rewards/margins": 0.8961877822875977, |
| "rewards/rejected": -1.2545711994171143, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.187648456057007, |
| "grad_norm": 52.0, |
| "learning_rate": 7.624703087885986e-07, |
| "logits/chosen": 0.06372962146997452, |
| "logits/rejected": 0.20503537356853485, |
| "logps/chosen": -39.09539794921875, |
| "logps/rejected": -65.76801300048828, |
| "loss": 0.3814, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.301858514547348, |
| "rewards/margins": 0.8968943953514099, |
| "rewards/rejected": -1.1987528800964355, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.1971496437054632, |
| "grad_norm": 44.0, |
| "learning_rate": 7.605700712589074e-07, |
| "logits/chosen": 0.09966941177845001, |
| "logits/rejected": 0.17389996349811554, |
| "logps/chosen": -44.16709899902344, |
| "logps/rejected": -61.92054748535156, |
| "loss": 0.4317, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -0.43170446157455444, |
| "rewards/margins": 0.799846887588501, |
| "rewards/rejected": -1.2315512895584106, |
| "step": 2016 |
| }, |
| { |
| "epoch": 1.2066508313539193, |
| "grad_norm": 41.0, |
| "learning_rate": 7.586698337292161e-07, |
| "logits/chosen": 0.0820382758975029, |
| "logits/rejected": 0.1380407214164734, |
| "logps/chosen": -40.85121536254883, |
| "logps/rejected": -61.79035568237305, |
| "loss": 0.3944, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.33426377177238464, |
| "rewards/margins": 0.8878821730613708, |
| "rewards/rejected": -1.222145915031433, |
| "step": 2032 |
| }, |
| { |
| "epoch": 1.2161520190023754, |
| "grad_norm": 49.25, |
| "learning_rate": 7.567695961995249e-07, |
| "logits/chosen": 0.07470327615737915, |
| "logits/rejected": 0.17876245081424713, |
| "logps/chosen": -40.400997161865234, |
| "logps/rejected": -62.64784240722656, |
| "loss": 0.3868, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3647741377353668, |
| "rewards/margins": 0.8964493274688721, |
| "rewards/rejected": -1.261223554611206, |
| "step": 2048 |
| }, |
| { |
| "epoch": 1.2256532066508314, |
| "grad_norm": 59.25, |
| "learning_rate": 7.548693586698337e-07, |
| "logits/chosen": 0.09447329491376877, |
| "logits/rejected": 0.20229777693748474, |
| "logps/chosen": -41.54475021362305, |
| "logps/rejected": -62.25804138183594, |
| "loss": 0.411, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.39685580134391785, |
| "rewards/margins": 0.8458825945854187, |
| "rewards/rejected": -1.2427384853363037, |
| "step": 2064 |
| }, |
| { |
| "epoch": 1.2351543942992875, |
| "grad_norm": 46.75, |
| "learning_rate": 7.529691211401425e-07, |
| "logits/chosen": 0.005977040156722069, |
| "logits/rejected": 0.12502533197402954, |
| "logps/chosen": -39.64662170410156, |
| "logps/rejected": -61.320465087890625, |
| "loss": 0.3843, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.35564541816711426, |
| "rewards/margins": 0.9019960165023804, |
| "rewards/rejected": -1.2576414346694946, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.2446555819477434, |
| "grad_norm": 56.75, |
| "learning_rate": 7.510688836104513e-07, |
| "logits/chosen": 0.08646165579557419, |
| "logits/rejected": 0.19107480347156525, |
| "logps/chosen": -41.432594299316406, |
| "logps/rejected": -63.792335510253906, |
| "loss": 0.4067, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.4125351309776306, |
| "rewards/margins": 0.8356191515922546, |
| "rewards/rejected": -1.2481542825698853, |
| "step": 2096 |
| }, |
| { |
| "epoch": 1.2541567695961995, |
| "grad_norm": 50.0, |
| "learning_rate": 7.4916864608076e-07, |
| "logits/chosen": 0.0462346225976944, |
| "logits/rejected": 0.18776997923851013, |
| "logps/chosen": -39.760746002197266, |
| "logps/rejected": -62.441749572753906, |
| "loss": 0.3845, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.30308061838150024, |
| "rewards/margins": 0.87934410572052, |
| "rewards/rejected": -1.182424783706665, |
| "step": 2112 |
| }, |
| { |
| "epoch": 1.2636579572446556, |
| "grad_norm": 46.25, |
| "learning_rate": 7.472684085510688e-07, |
| "logits/chosen": 0.08780578523874283, |
| "logits/rejected": 0.16588638722896576, |
| "logps/chosen": -41.076515197753906, |
| "logps/rejected": -62.57638931274414, |
| "loss": 0.3855, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3240537941455841, |
| "rewards/margins": 0.8670827150344849, |
| "rewards/rejected": -1.191136360168457, |
| "step": 2128 |
| }, |
| { |
| "epoch": 1.2731591448931117, |
| "grad_norm": 41.0, |
| "learning_rate": 7.453681710213776e-07, |
| "logits/chosen": 0.11139998584985733, |
| "logits/rejected": 0.18487989902496338, |
| "logps/chosen": -40.500728607177734, |
| "logps/rejected": -61.71664810180664, |
| "loss": 0.4028, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3737491965293884, |
| "rewards/margins": 0.8448764085769653, |
| "rewards/rejected": -1.218625545501709, |
| "step": 2144 |
| }, |
| { |
| "epoch": 1.2826603325415677, |
| "grad_norm": 51.25, |
| "learning_rate": 7.434679334916864e-07, |
| "logits/chosen": 0.05119692161679268, |
| "logits/rejected": 0.12550300359725952, |
| "logps/chosen": -43.28956604003906, |
| "logps/rejected": -63.49314880371094, |
| "loss": 0.3864, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.43977251648902893, |
| "rewards/margins": 0.8914352655410767, |
| "rewards/rejected": -1.3312077522277832, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2921615201900236, |
| "grad_norm": 66.0, |
| "learning_rate": 7.415676959619952e-07, |
| "logits/chosen": 0.023000139743089676, |
| "logits/rejected": 0.10380115360021591, |
| "logps/chosen": -40.769859313964844, |
| "logps/rejected": -61.942955017089844, |
| "loss": 0.3927, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3861158490180969, |
| "rewards/margins": 0.8635023832321167, |
| "rewards/rejected": -1.2496182918548584, |
| "step": 2176 |
| }, |
| { |
| "epoch": 1.3016627078384797, |
| "grad_norm": 56.0, |
| "learning_rate": 7.39667458432304e-07, |
| "logits/chosen": 0.08992882817983627, |
| "logits/rejected": 0.16640856862068176, |
| "logps/chosen": -41.653472900390625, |
| "logps/rejected": -65.48009490966797, |
| "loss": 0.3645, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3618718385696411, |
| "rewards/margins": 0.9549592733383179, |
| "rewards/rejected": -1.3168312311172485, |
| "step": 2192 |
| }, |
| { |
| "epoch": 1.3111638954869358, |
| "grad_norm": 48.5, |
| "learning_rate": 7.377672209026128e-07, |
| "logits/chosen": 0.022954029962420464, |
| "logits/rejected": 0.1399441808462143, |
| "logps/chosen": -40.20348358154297, |
| "logps/rejected": -62.03968811035156, |
| "loss": 0.3814, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.33127692341804504, |
| "rewards/margins": 0.9193905591964722, |
| "rewards/rejected": -1.250667691230774, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.3206650831353919, |
| "grad_norm": 45.0, |
| "learning_rate": 7.358669833729216e-07, |
| "logits/chosen": 0.05273166671395302, |
| "logits/rejected": 0.10670921206474304, |
| "logps/chosen": -40.3055419921875, |
| "logps/rejected": -60.818115234375, |
| "loss": 0.4315, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.36937782168388367, |
| "rewards/margins": 0.7719258666038513, |
| "rewards/rejected": -1.1413036584854126, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.330166270783848, |
| "grad_norm": 46.5, |
| "learning_rate": 7.339667458432304e-07, |
| "logits/chosen": 0.020988432690501213, |
| "logits/rejected": 0.13837015628814697, |
| "logps/chosen": -38.807682037353516, |
| "logps/rejected": -61.88154983520508, |
| "loss": 0.3805, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.2889925241470337, |
| "rewards/margins": 0.9347207546234131, |
| "rewards/rejected": -1.2237131595611572, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.339667458432304, |
| "grad_norm": 49.25, |
| "learning_rate": 7.320665083135391e-07, |
| "logits/chosen": 0.09183872491121292, |
| "logits/rejected": 0.19175560772418976, |
| "logps/chosen": -42.616172790527344, |
| "logps/rejected": -64.80868530273438, |
| "loss": 0.3724, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3987230360507965, |
| "rewards/margins": 0.9598705768585205, |
| "rewards/rejected": -1.3585937023162842, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.3491686460807601, |
| "grad_norm": 56.75, |
| "learning_rate": 7.301662707838479e-07, |
| "logits/chosen": 0.04315632954239845, |
| "logits/rejected": 0.12694051861763, |
| "logps/chosen": -42.23524856567383, |
| "logps/rejected": -63.53181076049805, |
| "loss": 0.4178, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.45748934149742126, |
| "rewards/margins": 0.7994433045387268, |
| "rewards/rejected": -1.2569326162338257, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.3586698337292162, |
| "grad_norm": 51.75, |
| "learning_rate": 7.282660332541567e-07, |
| "logits/chosen": 0.05002054572105408, |
| "logits/rejected": 0.10560965538024902, |
| "logps/chosen": -41.06108474731445, |
| "logps/rejected": -61.318199157714844, |
| "loss": 0.3899, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.39432668685913086, |
| "rewards/margins": 0.906670331954956, |
| "rewards/rejected": -1.300997018814087, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.3681710213776723, |
| "grad_norm": 59.0, |
| "learning_rate": 7.263657957244655e-07, |
| "logits/chosen": 0.0978277325630188, |
| "logits/rejected": 0.1469835340976715, |
| "logps/chosen": -40.50400161743164, |
| "logps/rejected": -64.60050964355469, |
| "loss": 0.3702, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.39239317178726196, |
| "rewards/margins": 0.9568536281585693, |
| "rewards/rejected": -1.3492467403411865, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.3776722090261282, |
| "grad_norm": 50.25, |
| "learning_rate": 7.244655581947743e-07, |
| "logits/chosen": 0.0486019104719162, |
| "logits/rejected": 0.17572590708732605, |
| "logps/chosen": -40.24808883666992, |
| "logps/rejected": -61.60661697387695, |
| "loss": 0.4009, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3608768582344055, |
| "rewards/margins": 0.831383466720581, |
| "rewards/rejected": -1.1922603845596313, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.3871733966745843, |
| "grad_norm": 54.25, |
| "learning_rate": 7.225653206650831e-07, |
| "logits/chosen": 0.03973031044006348, |
| "logits/rejected": 0.14902335405349731, |
| "logps/chosen": -39.89193344116211, |
| "logps/rejected": -61.921695709228516, |
| "loss": 0.3674, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3707820177078247, |
| "rewards/margins": 0.9807763695716858, |
| "rewards/rejected": -1.3515583276748657, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.3966745843230404, |
| "grad_norm": 45.25, |
| "learning_rate": 7.206650831353919e-07, |
| "logits/chosen": 0.01807180978357792, |
| "logits/rejected": 0.14278706908226013, |
| "logps/chosen": -39.26378631591797, |
| "logps/rejected": -60.40557861328125, |
| "loss": 0.3723, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.36147797107696533, |
| "rewards/margins": 0.9218441247940063, |
| "rewards/rejected": -1.2833220958709717, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.4061757719714965, |
| "grad_norm": 75.5, |
| "learning_rate": 7.187648456057007e-07, |
| "logits/chosen": 0.07302900403738022, |
| "logits/rejected": 0.14269563555717468, |
| "logps/chosen": -41.234161376953125, |
| "logps/rejected": -66.46778869628906, |
| "loss": 0.3693, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.39566075801849365, |
| "rewards/margins": 0.9585368633270264, |
| "rewards/rejected": -1.35419762134552, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.4156769596199525, |
| "grad_norm": 40.75, |
| "learning_rate": 7.168646080760095e-07, |
| "logits/chosen": 0.051808636635541916, |
| "logits/rejected": 0.21229980885982513, |
| "logps/chosen": -40.08659744262695, |
| "logps/rejected": -61.43524932861328, |
| "loss": 0.3833, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.37025976181030273, |
| "rewards/margins": 0.8906149864196777, |
| "rewards/rejected": -1.2608747482299805, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.4251781472684084, |
| "grad_norm": 44.75, |
| "learning_rate": 7.149643705463183e-07, |
| "logits/chosen": 0.0380956195294857, |
| "logits/rejected": 0.13877098262310028, |
| "logps/chosen": -42.67876052856445, |
| "logps/rejected": -63.79023361206055, |
| "loss": 0.3799, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3804953396320343, |
| "rewards/margins": 0.9205693006515503, |
| "rewards/rejected": -1.3010647296905518, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4346793349168645, |
| "grad_norm": 51.5, |
| "learning_rate": 7.130641330166271e-07, |
| "logits/chosen": 0.08207520097494125, |
| "logits/rejected": 0.14595362544059753, |
| "logps/chosen": -42.30644989013672, |
| "logps/rejected": -63.68936538696289, |
| "loss": 0.3859, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4032093584537506, |
| "rewards/margins": 0.8909638524055481, |
| "rewards/rejected": -1.294173240661621, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.4441805225653206, |
| "grad_norm": 47.0, |
| "learning_rate": 7.111638954869358e-07, |
| "logits/chosen": 0.017898384481668472, |
| "logits/rejected": 0.18962475657463074, |
| "logps/chosen": -41.686912536621094, |
| "logps/rejected": -62.23005294799805, |
| "loss": 0.3961, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3815171718597412, |
| "rewards/margins": 0.8675276637077332, |
| "rewards/rejected": -1.2490447759628296, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.4536817102137767, |
| "grad_norm": 46.5, |
| "learning_rate": 7.092636579572447e-07, |
| "logits/chosen": 0.07278081774711609, |
| "logits/rejected": 0.13306282460689545, |
| "logps/chosen": -39.96598815917969, |
| "logps/rejected": -62.889739990234375, |
| "loss": 0.3467, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.3247521221637726, |
| "rewards/margins": 1.0032382011413574, |
| "rewards/rejected": -1.3279902935028076, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.4631828978622328, |
| "grad_norm": 46.75, |
| "learning_rate": 7.073634204275534e-07, |
| "logits/chosen": 0.028041554614901543, |
| "logits/rejected": 0.126488596200943, |
| "logps/chosen": -42.11606216430664, |
| "logps/rejected": -60.70077896118164, |
| "loss": 0.4265, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.48028531670570374, |
| "rewards/margins": 0.801145076751709, |
| "rewards/rejected": -1.2814303636550903, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.4726840855106889, |
| "grad_norm": 52.75, |
| "learning_rate": 7.054631828978623e-07, |
| "logits/chosen": 0.05731602758169174, |
| "logits/rejected": 0.13755974173545837, |
| "logps/chosen": -39.297462463378906, |
| "logps/rejected": -62.45978546142578, |
| "loss": 0.373, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.31363803148269653, |
| "rewards/margins": 0.9406128525733948, |
| "rewards/rejected": -1.2542507648468018, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.482185273159145, |
| "grad_norm": 42.0, |
| "learning_rate": 7.03562945368171e-07, |
| "logits/chosen": 0.0816030278801918, |
| "logits/rejected": 0.14841099083423615, |
| "logps/chosen": -41.25545883178711, |
| "logps/rejected": -61.70344924926758, |
| "loss": 0.4112, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.4528937041759491, |
| "rewards/margins": 0.8154016733169556, |
| "rewards/rejected": -1.268295407295227, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.491686460807601, |
| "grad_norm": 47.75, |
| "learning_rate": 7.016627078384798e-07, |
| "logits/chosen": 0.07149530947208405, |
| "logits/rejected": 0.17929969727993011, |
| "logps/chosen": -41.5832633972168, |
| "logps/rejected": -67.25575256347656, |
| "loss": 0.3869, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.41033345460891724, |
| "rewards/margins": 0.9050929546356201, |
| "rewards/rejected": -1.3154264688491821, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.5011876484560571, |
| "grad_norm": 43.5, |
| "learning_rate": 6.997624703087886e-07, |
| "logits/chosen": 0.07096850126981735, |
| "logits/rejected": 0.14748625457286835, |
| "logps/chosen": -42.1703987121582, |
| "logps/rejected": -63.72838592529297, |
| "loss": 0.3933, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.43840450048446655, |
| "rewards/margins": 0.9121676683425903, |
| "rewards/rejected": -1.3505722284317017, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.5106888361045132, |
| "grad_norm": 40.25, |
| "learning_rate": 6.978622327790974e-07, |
| "logits/chosen": 0.07429444789886475, |
| "logits/rejected": 0.169959157705307, |
| "logps/chosen": -42.15153121948242, |
| "logps/rejected": -67.66064453125, |
| "loss": 0.3566, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.385272741317749, |
| "rewards/margins": 1.010432481765747, |
| "rewards/rejected": -1.3957051038742065, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.520190023752969, |
| "grad_norm": 46.5, |
| "learning_rate": 6.959619952494062e-07, |
| "logits/chosen": 0.01588086411356926, |
| "logits/rejected": 0.1419978141784668, |
| "logps/chosen": -42.120887756347656, |
| "logps/rejected": -65.12626647949219, |
| "loss": 0.3811, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.4491952657699585, |
| "rewards/margins": 0.9161649942398071, |
| "rewards/rejected": -1.3653602600097656, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.5296912114014252, |
| "grad_norm": 45.25, |
| "learning_rate": 6.94061757719715e-07, |
| "logits/chosen": 0.09590751677751541, |
| "logits/rejected": 0.20653869211673737, |
| "logps/chosen": -43.40055465698242, |
| "logps/rejected": -65.3685302734375, |
| "loss": 0.3946, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.42235833406448364, |
| "rewards/margins": 0.85853511095047, |
| "rewards/rejected": -1.2808934450149536, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.5391923990498813, |
| "grad_norm": 48.5, |
| "learning_rate": 6.921615201900237e-07, |
| "logits/chosen": 0.10696500539779663, |
| "logits/rejected": 0.1343913972377777, |
| "logps/chosen": -42.464046478271484, |
| "logps/rejected": -63.380943298339844, |
| "loss": 0.4106, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.44905325770378113, |
| "rewards/margins": 0.8345274925231934, |
| "rewards/rejected": -1.2835807800292969, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.5486935866983373, |
| "grad_norm": 57.0, |
| "learning_rate": 6.902612826603324e-07, |
| "logits/chosen": 0.04652204364538193, |
| "logits/rejected": 0.10454034060239792, |
| "logps/chosen": -40.00874328613281, |
| "logps/rejected": -60.814361572265625, |
| "loss": 0.3879, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.38040611147880554, |
| "rewards/margins": 0.8988234400749207, |
| "rewards/rejected": -1.279229760169983, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.5581947743467932, |
| "grad_norm": 51.25, |
| "learning_rate": 6.883610451306413e-07, |
| "logits/chosen": 0.11851513385772705, |
| "logits/rejected": 0.14123034477233887, |
| "logps/chosen": -41.09252166748047, |
| "logps/rejected": -64.00010681152344, |
| "loss": 0.3637, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3220548927783966, |
| "rewards/margins": 1.0083606243133545, |
| "rewards/rejected": -1.3304154872894287, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.5676959619952493, |
| "grad_norm": 50.75, |
| "learning_rate": 6.8646080760095e-07, |
| "logits/chosen": 0.016296017915010452, |
| "logits/rejected": 0.11665618419647217, |
| "logps/chosen": -39.934112548828125, |
| "logps/rejected": -61.658660888671875, |
| "loss": 0.3787, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.35257166624069214, |
| "rewards/margins": 0.9432335495948792, |
| "rewards/rejected": -1.2958052158355713, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.5771971496437054, |
| "grad_norm": 47.75, |
| "learning_rate": 6.845605700712589e-07, |
| "logits/chosen": 0.029370354488492012, |
| "logits/rejected": 0.13856028020381927, |
| "logps/chosen": -40.105552673339844, |
| "logps/rejected": -66.02179718017578, |
| "loss": 0.3523, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3618098795413971, |
| "rewards/margins": 1.0319128036499023, |
| "rewards/rejected": -1.3937227725982666, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.5866983372921615, |
| "grad_norm": 56.25, |
| "learning_rate": 6.826603325415676e-07, |
| "logits/chosen": 0.020885644480586052, |
| "logits/rejected": 0.17086170613765717, |
| "logps/chosen": -39.169334411621094, |
| "logps/rejected": -64.6575698852539, |
| "loss": 0.3559, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3273469805717468, |
| "rewards/margins": 0.9927965998649597, |
| "rewards/rejected": -1.320143461227417, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.5961995249406176, |
| "grad_norm": 51.75, |
| "learning_rate": 6.807600950118765e-07, |
| "logits/chosen": 0.03238772973418236, |
| "logits/rejected": 0.07314316928386688, |
| "logps/chosen": -42.21445083618164, |
| "logps/rejected": -64.58654022216797, |
| "loss": 0.378, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4178285002708435, |
| "rewards/margins": 0.9217057228088379, |
| "rewards/rejected": -1.3395342826843262, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.6057007125890737, |
| "grad_norm": 46.0, |
| "learning_rate": 6.788598574821852e-07, |
| "logits/chosen": 0.13759638369083405, |
| "logits/rejected": 0.21088193356990814, |
| "logps/chosen": -41.520233154296875, |
| "logps/rejected": -66.49211120605469, |
| "loss": 0.364, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.36787232756614685, |
| "rewards/margins": 0.9915460348129272, |
| "rewards/rejected": -1.359418511390686, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.6152019002375297, |
| "grad_norm": 43.5, |
| "learning_rate": 6.76959619952494e-07, |
| "logits/chosen": 0.03905269503593445, |
| "logits/rejected": 0.13661767542362213, |
| "logps/chosen": -38.46835708618164, |
| "logps/rejected": -60.601619720458984, |
| "loss": 0.3568, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.31080418825149536, |
| "rewards/margins": 0.9947383999824524, |
| "rewards/rejected": -1.3055424690246582, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.6247030878859858, |
| "grad_norm": 44.0, |
| "learning_rate": 6.750593824228028e-07, |
| "logits/chosen": 0.07454045116901398, |
| "logits/rejected": 0.17212067544460297, |
| "logps/chosen": -40.81611251831055, |
| "logps/rejected": -62.773468017578125, |
| "loss": 0.3971, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4132643938064575, |
| "rewards/margins": 0.8847667574882507, |
| "rewards/rejected": -1.2980310916900635, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.634204275534442, |
| "grad_norm": 38.75, |
| "learning_rate": 6.731591448931116e-07, |
| "logits/chosen": 0.0922178328037262, |
| "logits/rejected": 0.13588553667068481, |
| "logps/chosen": -39.945396423339844, |
| "logps/rejected": -61.7259407043457, |
| "loss": 0.3703, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.35344234108924866, |
| "rewards/margins": 0.9801003336906433, |
| "rewards/rejected": -1.3335425853729248, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.643705463182898, |
| "grad_norm": 55.25, |
| "learning_rate": 6.712589073634204e-07, |
| "logits/chosen": 0.09883704781532288, |
| "logits/rejected": 0.133531391620636, |
| "logps/chosen": -40.637210845947266, |
| "logps/rejected": -62.80479431152344, |
| "loss": 0.3582, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.34419822692871094, |
| "rewards/margins": 1.0021624565124512, |
| "rewards/rejected": -1.3463605642318726, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.6532066508313539, |
| "grad_norm": 35.75, |
| "learning_rate": 6.693586698337292e-07, |
| "logits/chosen": 0.06989182531833649, |
| "logits/rejected": 0.08873751759529114, |
| "logps/chosen": -37.793495178222656, |
| "logps/rejected": -60.03639602661133, |
| "loss": 0.3492, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.29584282636642456, |
| "rewards/margins": 1.0541434288024902, |
| "rewards/rejected": -1.3499860763549805, |
| "step": 2784 |
| }, |
| { |
| "epoch": 1.66270783847981, |
| "grad_norm": 44.5, |
| "learning_rate": 6.67458432304038e-07, |
| "logits/chosen": -0.0033259475603699684, |
| "logits/rejected": 0.14055626094341278, |
| "logps/chosen": -41.03327178955078, |
| "logps/rejected": -61.638668060302734, |
| "loss": 0.4027, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4033549129962921, |
| "rewards/margins": 0.8529766798019409, |
| "rewards/rejected": -1.2563316822052002, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.672209026128266, |
| "grad_norm": 55.25, |
| "learning_rate": 6.655581947743467e-07, |
| "logits/chosen": 0.054215628653764725, |
| "logits/rejected": 0.1540539413690567, |
| "logps/chosen": -42.16969299316406, |
| "logps/rejected": -63.94367218017578, |
| "loss": 0.3909, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4552299976348877, |
| "rewards/margins": 0.8886625170707703, |
| "rewards/rejected": -1.3438924551010132, |
| "step": 2816 |
| }, |
| { |
| "epoch": 1.6817102137767221, |
| "grad_norm": 62.0, |
| "learning_rate": 6.636579572446556e-07, |
| "logits/chosen": 0.054992396384477615, |
| "logits/rejected": 0.13705193996429443, |
| "logps/chosen": -40.24445724487305, |
| "logps/rejected": -60.925933837890625, |
| "loss": 0.3809, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.36651647090911865, |
| "rewards/margins": 0.9117545485496521, |
| "rewards/rejected": -1.278270959854126, |
| "step": 2832 |
| }, |
| { |
| "epoch": 1.691211401425178, |
| "grad_norm": 49.25, |
| "learning_rate": 6.617577197149643e-07, |
| "logits/chosen": 0.11320605874061584, |
| "logits/rejected": 0.17928367853164673, |
| "logps/chosen": -39.65250015258789, |
| "logps/rejected": -62.30231475830078, |
| "loss": 0.3734, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3596932888031006, |
| "rewards/margins": 0.9421689510345459, |
| "rewards/rejected": -1.3018622398376465, |
| "step": 2848 |
| }, |
| { |
| "epoch": 1.700712589073634, |
| "grad_norm": 40.0, |
| "learning_rate": 6.598574821852732e-07, |
| "logits/chosen": 0.025672361254692078, |
| "logits/rejected": 0.07097212225198746, |
| "logps/chosen": -41.138633728027344, |
| "logps/rejected": -63.86958694458008, |
| "loss": 0.3738, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.39380064606666565, |
| "rewards/margins": 0.9426943063735962, |
| "rewards/rejected": -1.3364949226379395, |
| "step": 2864 |
| }, |
| { |
| "epoch": 1.7102137767220902, |
| "grad_norm": 50.75, |
| "learning_rate": 6.579572446555819e-07, |
| "logits/chosen": 0.07025223225355148, |
| "logits/rejected": 0.06127552688121796, |
| "logps/chosen": -41.751319885253906, |
| "logps/rejected": -61.100685119628906, |
| "loss": 0.3783, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3228084146976471, |
| "rewards/margins": 0.9433329105377197, |
| "rewards/rejected": -1.2661415338516235, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.7197149643705463, |
| "grad_norm": 50.75, |
| "learning_rate": 6.560570071258908e-07, |
| "logits/chosen": -0.0122856879606843, |
| "logits/rejected": 0.08213039487600327, |
| "logps/chosen": -39.67725372314453, |
| "logps/rejected": -61.21652603149414, |
| "loss": 0.3817, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.39572060108184814, |
| "rewards/margins": 0.9105318188667297, |
| "rewards/rejected": -1.306252360343933, |
| "step": 2896 |
| }, |
| { |
| "epoch": 1.7292161520190024, |
| "grad_norm": 64.5, |
| "learning_rate": 6.541567695961995e-07, |
| "logits/chosen": 0.011255351826548576, |
| "logits/rejected": 0.10233234614133835, |
| "logps/chosen": -42.118900299072266, |
| "logps/rejected": -61.916690826416016, |
| "loss": 0.4019, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.41262125968933105, |
| "rewards/margins": 0.8827176094055176, |
| "rewards/rejected": -1.2953388690948486, |
| "step": 2912 |
| }, |
| { |
| "epoch": 1.7387173396674585, |
| "grad_norm": 44.25, |
| "learning_rate": 6.522565320665084e-07, |
| "logits/chosen": 0.07264780253171921, |
| "logits/rejected": 0.10011003911495209, |
| "logps/chosen": -41.43605422973633, |
| "logps/rejected": -59.60805892944336, |
| "loss": 0.406, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.3696948289871216, |
| "rewards/margins": 0.8629406690597534, |
| "rewards/rejected": -1.2326353788375854, |
| "step": 2928 |
| }, |
| { |
| "epoch": 1.7482185273159145, |
| "grad_norm": 41.0, |
| "learning_rate": 6.503562945368171e-07, |
| "logits/chosen": 0.05529964715242386, |
| "logits/rejected": 0.17115283012390137, |
| "logps/chosen": -40.79829788208008, |
| "logps/rejected": -65.05158996582031, |
| "loss": 0.3523, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.336140513420105, |
| "rewards/margins": 1.0045894384384155, |
| "rewards/rejected": -1.340729832649231, |
| "step": 2944 |
| }, |
| { |
| "epoch": 1.7577197149643706, |
| "grad_norm": 45.5, |
| "learning_rate": 6.484560570071259e-07, |
| "logits/chosen": 0.04629014432430267, |
| "logits/rejected": 0.1502920687198639, |
| "logps/chosen": -41.52702713012695, |
| "logps/rejected": -62.9970817565918, |
| "loss": 0.3898, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.4380848705768585, |
| "rewards/margins": 0.9207091331481934, |
| "rewards/rejected": -1.3587939739227295, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.7672209026128267, |
| "grad_norm": 44.5, |
| "learning_rate": 6.465558194774347e-07, |
| "logits/chosen": -0.011330801993608475, |
| "logits/rejected": 0.10742415487766266, |
| "logps/chosen": -40.159461975097656, |
| "logps/rejected": -64.75343322753906, |
| "loss": 0.3439, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3370579481124878, |
| "rewards/margins": 1.050633192062378, |
| "rewards/rejected": -1.3876910209655762, |
| "step": 2976 |
| }, |
| { |
| "epoch": 1.7767220902612828, |
| "grad_norm": 45.75, |
| "learning_rate": 6.446555819477434e-07, |
| "logits/chosen": 0.06578174233436584, |
| "logits/rejected": 0.1567022204399109, |
| "logps/chosen": -41.37073516845703, |
| "logps/rejected": -63.61731719970703, |
| "loss": 0.4027, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.42729830741882324, |
| "rewards/margins": 0.8533509373664856, |
| "rewards/rejected": -1.280649185180664, |
| "step": 2992 |
| }, |
| { |
| "epoch": 1.7862232779097387, |
| "grad_norm": 45.0, |
| "learning_rate": 6.427553444180523e-07, |
| "logits/chosen": 0.028342464938759804, |
| "logits/rejected": 0.17107811570167542, |
| "logps/chosen": -39.00218963623047, |
| "logps/rejected": -63.736061096191406, |
| "loss": 0.3565, |
| "rewards/accuracies": 0.9765625, |
| "rewards/chosen": -0.35766667127609253, |
| "rewards/margins": 0.9768067598342896, |
| "rewards/rejected": -1.3344734907150269, |
| "step": 3008 |
| }, |
| { |
| "epoch": 1.7957244655581948, |
| "grad_norm": 38.25, |
| "learning_rate": 6.40855106888361e-07, |
| "logits/chosen": 0.011551400646567345, |
| "logits/rejected": 0.13449110090732574, |
| "logps/chosen": -41.99809646606445, |
| "logps/rejected": -65.39181518554688, |
| "loss": 0.393, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4379570782184601, |
| "rewards/margins": 0.8674665689468384, |
| "rewards/rejected": -1.3054237365722656, |
| "step": 3024 |
| }, |
| { |
| "epoch": 1.8052256532066508, |
| "grad_norm": 63.5, |
| "learning_rate": 6.389548693586699e-07, |
| "logits/chosen": 0.05890597403049469, |
| "logits/rejected": 0.1898549646139145, |
| "logps/chosen": -39.68592834472656, |
| "logps/rejected": -65.33939361572266, |
| "loss": 0.3571, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.34970682859420776, |
| "rewards/margins": 1.0283163785934448, |
| "rewards/rejected": -1.3780232667922974, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.814726840855107, |
| "grad_norm": 58.25, |
| "learning_rate": 6.370546318289785e-07, |
| "logits/chosen": 0.019725359976291656, |
| "logits/rejected": 0.15305927395820618, |
| "logps/chosen": -42.078033447265625, |
| "logps/rejected": -61.74312973022461, |
| "loss": 0.4095, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.4093819856643677, |
| "rewards/margins": 0.836819052696228, |
| "rewards/rejected": -1.2462011575698853, |
| "step": 3056 |
| }, |
| { |
| "epoch": 1.8242280285035628, |
| "grad_norm": 53.0, |
| "learning_rate": 6.351543942992874e-07, |
| "logits/chosen": 0.057205233722925186, |
| "logits/rejected": 0.1036025658249855, |
| "logps/chosen": -42.514862060546875, |
| "logps/rejected": -60.93022155761719, |
| "loss": 0.4002, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3770271837711334, |
| "rewards/margins": 0.8727684617042542, |
| "rewards/rejected": -1.24979567527771, |
| "step": 3072 |
| }, |
| { |
| "epoch": 1.833729216152019, |
| "grad_norm": 46.0, |
| "learning_rate": 6.332541567695961e-07, |
| "logits/chosen": 0.032310646027326584, |
| "logits/rejected": 0.1559012234210968, |
| "logps/chosen": -43.822120666503906, |
| "logps/rejected": -64.49073791503906, |
| "loss": 0.4014, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.49134525656700134, |
| "rewards/margins": 0.861031174659729, |
| "rewards/rejected": -1.3523763418197632, |
| "step": 3088 |
| }, |
| { |
| "epoch": 1.843230403800475, |
| "grad_norm": 61.75, |
| "learning_rate": 6.31353919239905e-07, |
| "logits/chosen": 0.004301354289054871, |
| "logits/rejected": 0.08244706690311432, |
| "logps/chosen": -38.925811767578125, |
| "logps/rejected": -63.42317199707031, |
| "loss": 0.3441, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.34742915630340576, |
| "rewards/margins": 1.040870189666748, |
| "rewards/rejected": -1.3882992267608643, |
| "step": 3104 |
| }, |
| { |
| "epoch": 1.852731591448931, |
| "grad_norm": 54.25, |
| "learning_rate": 6.294536817102137e-07, |
| "logits/chosen": -0.00210411474108696, |
| "logits/rejected": 0.1236819252371788, |
| "logps/chosen": -38.81071090698242, |
| "logps/rejected": -61.1712646484375, |
| "loss": 0.3845, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.37758293747901917, |
| "rewards/margins": 0.8995540738105774, |
| "rewards/rejected": -1.2771369218826294, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.8622327790973872, |
| "grad_norm": 50.0, |
| "learning_rate": 6.275534441805226e-07, |
| "logits/chosen": 0.04914906993508339, |
| "logits/rejected": 0.13415980339050293, |
| "logps/chosen": -41.947120666503906, |
| "logps/rejected": -61.855255126953125, |
| "loss": 0.3982, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4452968239784241, |
| "rewards/margins": 0.8472458720207214, |
| "rewards/rejected": -1.2925426959991455, |
| "step": 3136 |
| }, |
| { |
| "epoch": 1.8717339667458432, |
| "grad_norm": 46.25, |
| "learning_rate": 6.256532066508313e-07, |
| "logits/chosen": 0.0254144836217165, |
| "logits/rejected": 0.12364037334918976, |
| "logps/chosen": -41.690284729003906, |
| "logps/rejected": -62.46638870239258, |
| "loss": 0.3497, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3699144124984741, |
| "rewards/margins": 1.013372540473938, |
| "rewards/rejected": -1.383286952972412, |
| "step": 3152 |
| }, |
| { |
| "epoch": 1.8812351543942993, |
| "grad_norm": 46.25, |
| "learning_rate": 6.2375296912114e-07, |
| "logits/chosen": 0.03663626313209534, |
| "logits/rejected": 0.14503881335258484, |
| "logps/chosen": -41.162559509277344, |
| "logps/rejected": -64.2937240600586, |
| "loss": 0.355, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.39480462670326233, |
| "rewards/margins": 1.0223582983016968, |
| "rewards/rejected": -1.4171628952026367, |
| "step": 3168 |
| }, |
| { |
| "epoch": 1.8907363420427554, |
| "grad_norm": 40.0, |
| "learning_rate": 6.218527315914489e-07, |
| "logits/chosen": 0.0167838204652071, |
| "logits/rejected": 0.09799753874540329, |
| "logps/chosen": -39.695152282714844, |
| "logps/rejected": -64.00861358642578, |
| "loss": 0.3409, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3396395742893219, |
| "rewards/margins": 1.0993303060531616, |
| "rewards/rejected": -1.4389699697494507, |
| "step": 3184 |
| }, |
| { |
| "epoch": 1.9002375296912115, |
| "grad_norm": 55.5, |
| "learning_rate": 6.199524940617576e-07, |
| "logits/chosen": 0.04460003226995468, |
| "logits/rejected": 0.14700554311275482, |
| "logps/chosen": -44.2281494140625, |
| "logps/rejected": -61.84652328491211, |
| "loss": 0.4352, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.5021332502365112, |
| "rewards/margins": 0.7495644688606262, |
| "rewards/rejected": -1.2516977787017822, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.9097387173396676, |
| "grad_norm": 32.5, |
| "learning_rate": 6.180522565320665e-07, |
| "logits/chosen": 0.08530572056770325, |
| "logits/rejected": 0.15016531944274902, |
| "logps/chosen": -41.49757385253906, |
| "logps/rejected": -67.6200942993164, |
| "loss": 0.3317, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.38793495297431946, |
| "rewards/margins": 1.099698543548584, |
| "rewards/rejected": -1.487633466720581, |
| "step": 3216 |
| }, |
| { |
| "epoch": 1.9192399049881235, |
| "grad_norm": 61.0, |
| "learning_rate": 6.161520190023752e-07, |
| "logits/chosen": 0.029300598427653313, |
| "logits/rejected": 0.13747388124465942, |
| "logps/chosen": -41.173736572265625, |
| "logps/rejected": -63.320472717285156, |
| "loss": 0.4127, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.4380740225315094, |
| "rewards/margins": 0.867578387260437, |
| "rewards/rejected": -1.3056524991989136, |
| "step": 3232 |
| }, |
| { |
| "epoch": 1.9287410926365796, |
| "grad_norm": 44.75, |
| "learning_rate": 6.142517814726841e-07, |
| "logits/chosen": 0.08258551359176636, |
| "logits/rejected": 0.16131961345672607, |
| "logps/chosen": -42.77522277832031, |
| "logps/rejected": -64.70687866210938, |
| "loss": 0.395, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4444977045059204, |
| "rewards/margins": 0.8981250524520874, |
| "rewards/rejected": -1.3426228761672974, |
| "step": 3248 |
| }, |
| { |
| "epoch": 1.9382422802850356, |
| "grad_norm": 67.5, |
| "learning_rate": 6.123515439429928e-07, |
| "logits/chosen": 0.044392723590135574, |
| "logits/rejected": 0.17256534099578857, |
| "logps/chosen": -40.32421875, |
| "logps/rejected": -63.82672119140625, |
| "loss": 0.3611, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.40335986018180847, |
| "rewards/margins": 1.0055795907974243, |
| "rewards/rejected": -1.4089393615722656, |
| "step": 3264 |
| }, |
| { |
| "epoch": 1.9477434679334917, |
| "grad_norm": 49.25, |
| "learning_rate": 6.104513064133017e-07, |
| "logits/chosen": 0.1122560128569603, |
| "logits/rejected": 0.1721959263086319, |
| "logps/chosen": -41.03020477294922, |
| "logps/rejected": -62.24818420410156, |
| "loss": 0.4023, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.41969960927963257, |
| "rewards/margins": 0.8544268608093262, |
| "rewards/rejected": -1.2741265296936035, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.9572446555819476, |
| "grad_norm": 54.5, |
| "learning_rate": 6.085510688836104e-07, |
| "logits/chosen": 0.03646089881658554, |
| "logits/rejected": 0.1658436357975006, |
| "logps/chosen": -41.14552307128906, |
| "logps/rejected": -63.396366119384766, |
| "loss": 0.3697, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4058351218700409, |
| "rewards/margins": 0.9656636714935303, |
| "rewards/rejected": -1.3714988231658936, |
| "step": 3296 |
| }, |
| { |
| "epoch": 1.9667458432304037, |
| "grad_norm": 46.75, |
| "learning_rate": 6.066508313539193e-07, |
| "logits/chosen": 0.03148533031344414, |
| "logits/rejected": 0.14277218282222748, |
| "logps/chosen": -42.05448913574219, |
| "logps/rejected": -65.2642593383789, |
| "loss": 0.3928, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4272368550300598, |
| "rewards/margins": 0.8805699348449707, |
| "rewards/rejected": -1.3078068494796753, |
| "step": 3312 |
| }, |
| { |
| "epoch": 1.9762470308788598, |
| "grad_norm": 41.75, |
| "learning_rate": 6.04750593824228e-07, |
| "logits/chosen": 0.06896740198135376, |
| "logits/rejected": 0.10403262823820114, |
| "logps/chosen": -40.34957504272461, |
| "logps/rejected": -61.979915618896484, |
| "loss": 0.3788, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3596211373806, |
| "rewards/margins": 0.9470376968383789, |
| "rewards/rejected": -1.3066588640213013, |
| "step": 3328 |
| }, |
| { |
| "epoch": 1.9857482185273159, |
| "grad_norm": 55.5, |
| "learning_rate": 6.028503562945369e-07, |
| "logits/chosen": -0.00018313713371753693, |
| "logits/rejected": 0.09493206441402435, |
| "logps/chosen": -40.674354553222656, |
| "logps/rejected": -62.300506591796875, |
| "loss": 0.3676, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.35612305998802185, |
| "rewards/margins": 0.9978376626968384, |
| "rewards/rejected": -1.3539607524871826, |
| "step": 3344 |
| }, |
| { |
| "epoch": 1.995249406175772, |
| "grad_norm": 59.75, |
| "learning_rate": 6.009501187648456e-07, |
| "logits/chosen": -0.004904988221824169, |
| "logits/rejected": 0.09877481311559677, |
| "logps/chosen": -40.2232666015625, |
| "logps/rejected": -61.96391296386719, |
| "loss": 0.3655, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3332652151584625, |
| "rewards/margins": 1.0086623430252075, |
| "rewards/rejected": -1.3419275283813477, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.004750593824228, |
| "grad_norm": 58.5, |
| "learning_rate": 5.990498812351543e-07, |
| "logits/chosen": 0.06698215007781982, |
| "logits/rejected": 0.17540045082569122, |
| "logps/chosen": -43.14054870605469, |
| "logps/rejected": -63.408565521240234, |
| "loss": 0.4218, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4646109938621521, |
| "rewards/margins": 0.7780359387397766, |
| "rewards/rejected": -1.2426469326019287, |
| "step": 3376 |
| }, |
| { |
| "epoch": 2.014251781472684, |
| "grad_norm": 44.25, |
| "learning_rate": 5.971496437054632e-07, |
| "logits/chosen": 0.09281051158905029, |
| "logits/rejected": 0.1449931114912033, |
| "logps/chosen": -41.34113311767578, |
| "logps/rejected": -64.54020690917969, |
| "loss": 0.3712, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.37270694971084595, |
| "rewards/margins": 0.932529628276825, |
| "rewards/rejected": -1.305236577987671, |
| "step": 3392 |
| }, |
| { |
| "epoch": 2.02375296912114, |
| "grad_norm": 43.75, |
| "learning_rate": 5.952494061757719e-07, |
| "logits/chosen": 0.03906689211726189, |
| "logits/rejected": 0.14161323010921478, |
| "logps/chosen": -39.38663101196289, |
| "logps/rejected": -63.778724670410156, |
| "loss": 0.3588, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3333834707736969, |
| "rewards/margins": 1.012413501739502, |
| "rewards/rejected": -1.3457969427108765, |
| "step": 3408 |
| }, |
| { |
| "epoch": 2.0332541567695963, |
| "grad_norm": 35.5, |
| "learning_rate": 5.933491686460808e-07, |
| "logits/chosen": 0.009185846894979477, |
| "logits/rejected": 0.027248330414295197, |
| "logps/chosen": -40.65418243408203, |
| "logps/rejected": -59.860408782958984, |
| "loss": 0.391, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.4065134525299072, |
| "rewards/margins": 0.879849910736084, |
| "rewards/rejected": -1.2863633632659912, |
| "step": 3424 |
| }, |
| { |
| "epoch": 2.0427553444180524, |
| "grad_norm": 50.25, |
| "learning_rate": 5.914489311163895e-07, |
| "logits/chosen": -0.04159889370203018, |
| "logits/rejected": 0.12180589139461517, |
| "logps/chosen": -40.518218994140625, |
| "logps/rejected": -63.04229736328125, |
| "loss": 0.3689, |
| "rewards/accuracies": 0.9921875, |
| "rewards/chosen": -0.39485669136047363, |
| "rewards/margins": 0.9171349406242371, |
| "rewards/rejected": -1.311991572380066, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.0522565320665085, |
| "grad_norm": 47.0, |
| "learning_rate": 5.895486935866984e-07, |
| "logits/chosen": 0.01238052174448967, |
| "logits/rejected": 0.09721352905035019, |
| "logps/chosen": -42.85576248168945, |
| "logps/rejected": -65.13483428955078, |
| "loss": 0.39, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.43811485171318054, |
| "rewards/margins": 0.9170427918434143, |
| "rewards/rejected": -1.3551576137542725, |
| "step": 3456 |
| }, |
| { |
| "epoch": 2.0617577197149646, |
| "grad_norm": 43.0, |
| "learning_rate": 5.876484560570071e-07, |
| "logits/chosen": 0.00015027448534965515, |
| "logits/rejected": 0.10659853368997574, |
| "logps/chosen": -40.47695541381836, |
| "logps/rejected": -62.642822265625, |
| "loss": 0.3501, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.37582921981811523, |
| "rewards/margins": 1.0362910032272339, |
| "rewards/rejected": -1.4121201038360596, |
| "step": 3472 |
| }, |
| { |
| "epoch": 2.07125890736342, |
| "grad_norm": 44.0, |
| "learning_rate": 5.85748218527316e-07, |
| "logits/chosen": -0.007269053254276514, |
| "logits/rejected": 0.11244423687458038, |
| "logps/chosen": -40.462440490722656, |
| "logps/rejected": -65.11763000488281, |
| "loss": 0.3694, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3953917920589447, |
| "rewards/margins": 1.003770351409912, |
| "rewards/rejected": -1.3991621732711792, |
| "step": 3488 |
| }, |
| { |
| "epoch": 2.0807600950118763, |
| "grad_norm": 40.25, |
| "learning_rate": 5.838479809976247e-07, |
| "logits/chosen": 0.007935550063848495, |
| "logits/rejected": 0.1055232435464859, |
| "logps/chosen": -39.9696159362793, |
| "logps/rejected": -65.34225463867188, |
| "loss": 0.3638, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.38794174790382385, |
| "rewards/margins": 0.9854438304901123, |
| "rewards/rejected": -1.3733854293823242, |
| "step": 3504 |
| }, |
| { |
| "epoch": 2.0902612826603324, |
| "grad_norm": 49.0, |
| "learning_rate": 5.819477434679335e-07, |
| "logits/chosen": -0.002856359351426363, |
| "logits/rejected": 0.12148334830999374, |
| "logps/chosen": -40.93573760986328, |
| "logps/rejected": -62.00894546508789, |
| "loss": 0.4115, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.42675450444221497, |
| "rewards/margins": 0.8534324765205383, |
| "rewards/rejected": -1.2801870107650757, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.0997624703087885, |
| "grad_norm": 52.75, |
| "learning_rate": 5.800475059382422e-07, |
| "logits/chosen": 0.019883442670106888, |
| "logits/rejected": 0.12564308941364288, |
| "logps/chosen": -42.050662994384766, |
| "logps/rejected": -61.40731430053711, |
| "loss": 0.3947, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.45309561491012573, |
| "rewards/margins": 0.8529905080795288, |
| "rewards/rejected": -1.3060861825942993, |
| "step": 3536 |
| }, |
| { |
| "epoch": 2.1092636579572446, |
| "grad_norm": 44.25, |
| "learning_rate": 5.78147268408551e-07, |
| "logits/chosen": -0.0019861001055687666, |
| "logits/rejected": 0.13409556448459625, |
| "logps/chosen": -42.92340850830078, |
| "logps/rejected": -66.3511734008789, |
| "loss": 0.3933, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.43070507049560547, |
| "rewards/margins": 0.907360315322876, |
| "rewards/rejected": -1.3380653858184814, |
| "step": 3552 |
| }, |
| { |
| "epoch": 2.1187648456057007, |
| "grad_norm": 48.25, |
| "learning_rate": 5.762470308788598e-07, |
| "logits/chosen": 0.13548046350479126, |
| "logits/rejected": 0.11156810820102692, |
| "logps/chosen": -41.66028594970703, |
| "logps/rejected": -62.116031646728516, |
| "loss": 0.3611, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3860835134983063, |
| "rewards/margins": 0.9740406274795532, |
| "rewards/rejected": -1.360124111175537, |
| "step": 3568 |
| }, |
| { |
| "epoch": 2.1282660332541568, |
| "grad_norm": 49.25, |
| "learning_rate": 5.743467933491685e-07, |
| "logits/chosen": 0.022612586617469788, |
| "logits/rejected": 0.13782542943954468, |
| "logps/chosen": -39.91732406616211, |
| "logps/rejected": -64.74801635742188, |
| "loss": 0.3778, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.38275399804115295, |
| "rewards/margins": 0.9404458999633789, |
| "rewards/rejected": -1.3231998682022095, |
| "step": 3584 |
| }, |
| { |
| "epoch": 2.137767220902613, |
| "grad_norm": 46.5, |
| "learning_rate": 5.724465558194774e-07, |
| "logits/chosen": 0.06513424217700958, |
| "logits/rejected": 0.1354922503232956, |
| "logps/chosen": -40.324771881103516, |
| "logps/rejected": -62.070045471191406, |
| "loss": 0.3937, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4051070213317871, |
| "rewards/margins": 0.8988076448440552, |
| "rewards/rejected": -1.3039146661758423, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.147268408551069, |
| "grad_norm": 53.75, |
| "learning_rate": 5.705463182897861e-07, |
| "logits/chosen": 0.09106743335723877, |
| "logits/rejected": 0.22678810358047485, |
| "logps/chosen": -44.392295837402344, |
| "logps/rejected": -65.7562255859375, |
| "loss": 0.3996, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.5041301846504211, |
| "rewards/margins": 0.863517701625824, |
| "rewards/rejected": -1.3676478862762451, |
| "step": 3616 |
| }, |
| { |
| "epoch": 2.156769596199525, |
| "grad_norm": 49.5, |
| "learning_rate": 5.68646080760095e-07, |
| "logits/chosen": 0.045438431203365326, |
| "logits/rejected": 0.09103736281394958, |
| "logps/chosen": -38.36294174194336, |
| "logps/rejected": -58.81281661987305, |
| "loss": 0.3819, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3600079417228699, |
| "rewards/margins": 0.9226837754249573, |
| "rewards/rejected": -1.2826919555664062, |
| "step": 3632 |
| }, |
| { |
| "epoch": 2.166270783847981, |
| "grad_norm": 46.0, |
| "learning_rate": 5.667458432304037e-07, |
| "logits/chosen": 0.13296620547771454, |
| "logits/rejected": 0.18886858224868774, |
| "logps/chosen": -40.72220230102539, |
| "logps/rejected": -64.00652313232422, |
| "loss": 0.3879, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.399859756231308, |
| "rewards/margins": 0.9217446446418762, |
| "rewards/rejected": -1.3216043710708618, |
| "step": 3648 |
| }, |
| { |
| "epoch": 2.175771971496437, |
| "grad_norm": 45.5, |
| "learning_rate": 5.648456057007126e-07, |
| "logits/chosen": 0.02620471641421318, |
| "logits/rejected": 0.11671482771635056, |
| "logps/chosen": -40.500267028808594, |
| "logps/rejected": -62.4256591796875, |
| "loss": 0.3847, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.39798420667648315, |
| "rewards/margins": 0.910751223564148, |
| "rewards/rejected": -1.3087353706359863, |
| "step": 3664 |
| }, |
| { |
| "epoch": 2.1852731591448933, |
| "grad_norm": 38.5, |
| "learning_rate": 5.629453681710213e-07, |
| "logits/chosen": 0.0530809611082077, |
| "logits/rejected": 0.16400957107543945, |
| "logps/chosen": -42.896827697753906, |
| "logps/rejected": -63.195579528808594, |
| "loss": 0.4002, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.394864559173584, |
| "rewards/margins": 0.8955377340316772, |
| "rewards/rejected": -1.2904024124145508, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.1947743467933494, |
| "grad_norm": 50.5, |
| "learning_rate": 5.610451306413302e-07, |
| "logits/chosen": 0.040826160460710526, |
| "logits/rejected": 0.10787297785282135, |
| "logps/chosen": -40.30386734008789, |
| "logps/rejected": -64.05851745605469, |
| "loss": 0.3583, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3300076425075531, |
| "rewards/margins": 1.0026148557662964, |
| "rewards/rejected": -1.3326225280761719, |
| "step": 3696 |
| }, |
| { |
| "epoch": 2.204275534441805, |
| "grad_norm": 58.0, |
| "learning_rate": 5.591448931116389e-07, |
| "logits/chosen": 0.06701106578111649, |
| "logits/rejected": 0.13853205740451813, |
| "logps/chosen": -42.53071594238281, |
| "logps/rejected": -62.566688537597656, |
| "loss": 0.3845, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.41142404079437256, |
| "rewards/margins": 0.9055157899856567, |
| "rewards/rejected": -1.3169398307800293, |
| "step": 3712 |
| }, |
| { |
| "epoch": 2.213776722090261, |
| "grad_norm": 57.0, |
| "learning_rate": 5.572446555819477e-07, |
| "logits/chosen": 0.01710950955748558, |
| "logits/rejected": 0.095205157995224, |
| "logps/chosen": -40.496944427490234, |
| "logps/rejected": -61.76165771484375, |
| "loss": 0.3853, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3820299506187439, |
| "rewards/margins": 0.9107472896575928, |
| "rewards/rejected": -1.292777180671692, |
| "step": 3728 |
| }, |
| { |
| "epoch": 2.223277909738717, |
| "grad_norm": 56.0, |
| "learning_rate": 5.553444180522565e-07, |
| "logits/chosen": 0.08512625843286514, |
| "logits/rejected": 0.16064119338989258, |
| "logps/chosen": -41.3702278137207, |
| "logps/rejected": -66.45726013183594, |
| "loss": 0.3534, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3733311891555786, |
| "rewards/margins": 1.0442577600479126, |
| "rewards/rejected": -1.4175888299942017, |
| "step": 3744 |
| }, |
| { |
| "epoch": 2.2327790973871733, |
| "grad_norm": 60.75, |
| "learning_rate": 5.534441805225653e-07, |
| "logits/chosen": 0.09103821218013763, |
| "logits/rejected": 0.13160774111747742, |
| "logps/chosen": -43.8742790222168, |
| "logps/rejected": -65.8563003540039, |
| "loss": 0.3973, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.4512379467487335, |
| "rewards/margins": 0.9139763712882996, |
| "rewards/rejected": -1.365214228630066, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.2422802850356294, |
| "grad_norm": 44.75, |
| "learning_rate": 5.515439429928741e-07, |
| "logits/chosen": -0.0005833394825458527, |
| "logits/rejected": 0.10005127638578415, |
| "logps/chosen": -42.24885559082031, |
| "logps/rejected": -64.85872650146484, |
| "loss": 0.3583, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.42689433693885803, |
| "rewards/margins": 0.9682207703590393, |
| "rewards/rejected": -1.3951151371002197, |
| "step": 3776 |
| }, |
| { |
| "epoch": 2.2517814726840855, |
| "grad_norm": 53.5, |
| "learning_rate": 5.496437054631829e-07, |
| "logits/chosen": 0.05875023454427719, |
| "logits/rejected": 0.13941840827465057, |
| "logps/chosen": -42.663665771484375, |
| "logps/rejected": -63.30080795288086, |
| "loss": 0.3864, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.4756395220756531, |
| "rewards/margins": 0.941820502281189, |
| "rewards/rejected": -1.4174600839614868, |
| "step": 3792 |
| }, |
| { |
| "epoch": 2.2612826603325415, |
| "grad_norm": 39.25, |
| "learning_rate": 5.477434679334917e-07, |
| "logits/chosen": 0.02674500085413456, |
| "logits/rejected": 0.17680123448371887, |
| "logps/chosen": -38.55767822265625, |
| "logps/rejected": -62.6890754699707, |
| "loss": 0.3493, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.2811494767665863, |
| "rewards/margins": 1.024692416191101, |
| "rewards/rejected": -1.3058419227600098, |
| "step": 3808 |
| }, |
| { |
| "epoch": 2.2707838479809976, |
| "grad_norm": 48.25, |
| "learning_rate": 5.458432304038004e-07, |
| "logits/chosen": 0.009942879900336266, |
| "logits/rejected": 0.1574607640504837, |
| "logps/chosen": -41.19435501098633, |
| "logps/rejected": -63.181053161621094, |
| "loss": 0.3878, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.42380547523498535, |
| "rewards/margins": 0.9084890484809875, |
| "rewards/rejected": -1.3322944641113281, |
| "step": 3824 |
| }, |
| { |
| "epoch": 2.2802850356294537, |
| "grad_norm": 49.25, |
| "learning_rate": 5.439429928741093e-07, |
| "logits/chosen": 0.06854081898927689, |
| "logits/rejected": 0.1759743094444275, |
| "logps/chosen": -40.84571075439453, |
| "logps/rejected": -64.14063262939453, |
| "loss": 0.4032, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.4499894678592682, |
| "rewards/margins": 0.8732954263687134, |
| "rewards/rejected": -1.3232848644256592, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.28978622327791, |
| "grad_norm": 58.5, |
| "learning_rate": 5.42042755344418e-07, |
| "logits/chosen": 0.019341815263032913, |
| "logits/rejected": 0.16274170577526093, |
| "logps/chosen": -41.712154388427734, |
| "logps/rejected": -63.17491149902344, |
| "loss": 0.3988, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4367818534374237, |
| "rewards/margins": 0.8796077370643616, |
| "rewards/rejected": -1.3163896799087524, |
| "step": 3856 |
| }, |
| { |
| "epoch": 2.299287410926366, |
| "grad_norm": 45.5, |
| "learning_rate": 5.401425178147269e-07, |
| "logits/chosen": 0.0484287403523922, |
| "logits/rejected": 0.15023472905158997, |
| "logps/chosen": -40.65409851074219, |
| "logps/rejected": -64.70632934570312, |
| "loss": 0.3934, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3949016332626343, |
| "rewards/margins": 0.9213113784790039, |
| "rewards/rejected": -1.3162128925323486, |
| "step": 3872 |
| }, |
| { |
| "epoch": 2.308788598574822, |
| "grad_norm": 45.5, |
| "learning_rate": 5.382422802850356e-07, |
| "logits/chosen": 0.0558081790804863, |
| "logits/rejected": 0.14007531106472015, |
| "logps/chosen": -40.67841339111328, |
| "logps/rejected": -63.90584945678711, |
| "loss": 0.3557, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.40134915709495544, |
| "rewards/margins": 0.9840033054351807, |
| "rewards/rejected": -1.385352373123169, |
| "step": 3888 |
| }, |
| { |
| "epoch": 2.318289786223278, |
| "grad_norm": 43.5, |
| "learning_rate": 5.363420427553445e-07, |
| "logits/chosen": 0.06617691367864609, |
| "logits/rejected": 0.13138173520565033, |
| "logps/chosen": -42.65940856933594, |
| "logps/rejected": -63.277469635009766, |
| "loss": 0.3987, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.452262282371521, |
| "rewards/margins": 0.8770920634269714, |
| "rewards/rejected": -1.3293542861938477, |
| "step": 3904 |
| }, |
| { |
| "epoch": 2.3277909738717337, |
| "grad_norm": 49.75, |
| "learning_rate": 5.344418052256532e-07, |
| "logits/chosen": 0.0712217167019844, |
| "logits/rejected": 0.11869163066148758, |
| "logps/chosen": -42.18745803833008, |
| "logps/rejected": -62.14987564086914, |
| "loss": 0.387, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3666439354419708, |
| "rewards/margins": 0.9148606061935425, |
| "rewards/rejected": -1.281504511833191, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.33729216152019, |
| "grad_norm": 41.25, |
| "learning_rate": 5.32541567695962e-07, |
| "logits/chosen": 0.052989356219768524, |
| "logits/rejected": 0.1383398026227951, |
| "logps/chosen": -39.75597381591797, |
| "logps/rejected": -61.32164764404297, |
| "loss": 0.3859, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3883766829967499, |
| "rewards/margins": 0.9118002653121948, |
| "rewards/rejected": -1.3001768589019775, |
| "step": 3936 |
| }, |
| { |
| "epoch": 2.346793349168646, |
| "grad_norm": 45.25, |
| "learning_rate": 5.306413301662708e-07, |
| "logits/chosen": 0.07647877931594849, |
| "logits/rejected": 0.12356515228748322, |
| "logps/chosen": -41.482666015625, |
| "logps/rejected": -64.74012756347656, |
| "loss": 0.4029, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.4555918574333191, |
| "rewards/margins": 0.8827881217002869, |
| "rewards/rejected": -1.338379979133606, |
| "step": 3952 |
| }, |
| { |
| "epoch": 2.356294536817102, |
| "grad_norm": 49.0, |
| "learning_rate": 5.287410926365796e-07, |
| "logits/chosen": 0.025606969371438026, |
| "logits/rejected": 0.11031323671340942, |
| "logps/chosen": -40.05935287475586, |
| "logps/rejected": -61.486900329589844, |
| "loss": 0.367, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.34517067670822144, |
| "rewards/margins": 1.008548378944397, |
| "rewards/rejected": -1.3537191152572632, |
| "step": 3968 |
| }, |
| { |
| "epoch": 2.365795724465558, |
| "grad_norm": 52.75, |
| "learning_rate": 5.268408551068883e-07, |
| "logits/chosen": 0.020188216120004654, |
| "logits/rejected": 0.07814847677946091, |
| "logps/chosen": -41.763946533203125, |
| "logps/rejected": -62.15244674682617, |
| "loss": 0.3841, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4187239706516266, |
| "rewards/margins": 0.9112571477890015, |
| "rewards/rejected": -1.3299810886383057, |
| "step": 3984 |
| }, |
| { |
| "epoch": 2.375296912114014, |
| "grad_norm": 53.5, |
| "learning_rate": 5.24940617577197e-07, |
| "logits/chosen": -0.013901928439736366, |
| "logits/rejected": 0.1482914239168167, |
| "logps/chosen": -40.2778205871582, |
| "logps/rejected": -64.91542053222656, |
| "loss": 0.3821, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.42066463828086853, |
| "rewards/margins": 0.9068177938461304, |
| "rewards/rejected": -1.3274823427200317, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.3847980997624703, |
| "grad_norm": 60.75, |
| "learning_rate": 5.230403800475059e-07, |
| "logits/chosen": 0.07787059247493744, |
| "logits/rejected": 0.17709147930145264, |
| "logps/chosen": -42.24334716796875, |
| "logps/rejected": -63.37432861328125, |
| "loss": 0.3924, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3965498208999634, |
| "rewards/margins": 0.8994001150131226, |
| "rewards/rejected": -1.295949935913086, |
| "step": 4016 |
| }, |
| { |
| "epoch": 2.3942992874109263, |
| "grad_norm": 47.25, |
| "learning_rate": 5.211401425178146e-07, |
| "logits/chosen": 0.06299006193876266, |
| "logits/rejected": 0.14556781947612762, |
| "logps/chosen": -42.120365142822266, |
| "logps/rejected": -64.56780242919922, |
| "loss": 0.3795, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4348159730434418, |
| "rewards/margins": 0.9128706455230713, |
| "rewards/rejected": -1.3476866483688354, |
| "step": 4032 |
| }, |
| { |
| "epoch": 2.4038004750593824, |
| "grad_norm": 44.25, |
| "learning_rate": 5.192399049881235e-07, |
| "logits/chosen": 0.086119145154953, |
| "logits/rejected": 0.1857333481311798, |
| "logps/chosen": -40.12603759765625, |
| "logps/rejected": -61.00786590576172, |
| "loss": 0.4011, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.41915708780288696, |
| "rewards/margins": 0.8710625767707825, |
| "rewards/rejected": -1.2902196645736694, |
| "step": 4048 |
| }, |
| { |
| "epoch": 2.4133016627078385, |
| "grad_norm": 37.75, |
| "learning_rate": 5.173396674584322e-07, |
| "logits/chosen": 0.027670690789818764, |
| "logits/rejected": 0.1215248703956604, |
| "logps/chosen": -40.98219299316406, |
| "logps/rejected": -62.411865234375, |
| "loss": 0.3782, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3689427971839905, |
| "rewards/margins": 0.9613568186759949, |
| "rewards/rejected": -1.3302994966506958, |
| "step": 4064 |
| }, |
| { |
| "epoch": 2.4228028503562946, |
| "grad_norm": 45.5, |
| "learning_rate": 5.154394299287411e-07, |
| "logits/chosen": 0.03985697776079178, |
| "logits/rejected": 0.12585824728012085, |
| "logps/chosen": -42.074466705322266, |
| "logps/rejected": -64.21247100830078, |
| "loss": 0.3755, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3887619376182556, |
| "rewards/margins": 0.9904804229736328, |
| "rewards/rejected": -1.3792424201965332, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.4323040380047507, |
| "grad_norm": 43.75, |
| "learning_rate": 5.135391923990498e-07, |
| "logits/chosen": 0.05401856452226639, |
| "logits/rejected": 0.10051050782203674, |
| "logps/chosen": -41.186561584472656, |
| "logps/rejected": -63.16551971435547, |
| "loss": 0.4086, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.4640354812145233, |
| "rewards/margins": 0.8580038547515869, |
| "rewards/rejected": -1.3220393657684326, |
| "step": 4096 |
| }, |
| { |
| "epoch": 2.441805225653207, |
| "grad_norm": 48.25, |
| "learning_rate": 5.116389548693586e-07, |
| "logits/chosen": 0.06853917986154556, |
| "logits/rejected": 0.1522480845451355, |
| "logps/chosen": -41.95103073120117, |
| "logps/rejected": -64.08912658691406, |
| "loss": 0.4026, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4834258258342743, |
| "rewards/margins": 0.8559648394584656, |
| "rewards/rejected": -1.3393906354904175, |
| "step": 4112 |
| }, |
| { |
| "epoch": 2.451306413301663, |
| "grad_norm": 41.5, |
| "learning_rate": 5.097387173396674e-07, |
| "logits/chosen": 0.07197491824626923, |
| "logits/rejected": 0.16541120409965515, |
| "logps/chosen": -42.12483215332031, |
| "logps/rejected": -66.00977325439453, |
| "loss": 0.3502, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.4519379436969757, |
| "rewards/margins": 1.0152369737625122, |
| "rewards/rejected": -1.467175006866455, |
| "step": 4128 |
| }, |
| { |
| "epoch": 2.460807600950119, |
| "grad_norm": 41.0, |
| "learning_rate": 5.078384798099762e-07, |
| "logits/chosen": 0.048155199736356735, |
| "logits/rejected": 0.14590026438236237, |
| "logps/chosen": -41.200191497802734, |
| "logps/rejected": -63.48173522949219, |
| "loss": 0.3958, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.46383845806121826, |
| "rewards/margins": 0.8997552394866943, |
| "rewards/rejected": -1.3635936975479126, |
| "step": 4144 |
| }, |
| { |
| "epoch": 2.470308788598575, |
| "grad_norm": 44.0, |
| "learning_rate": 5.05938242280285e-07, |
| "logits/chosen": 0.06484143435955048, |
| "logits/rejected": 0.13990077376365662, |
| "logps/chosen": -41.676612854003906, |
| "logps/rejected": -66.45744323730469, |
| "loss": 0.3469, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.404636025428772, |
| "rewards/margins": 1.0517361164093018, |
| "rewards/rejected": -1.4563721418380737, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.4798099762470307, |
| "grad_norm": 56.0, |
| "learning_rate": 5.040380047505938e-07, |
| "logits/chosen": 0.10340757668018341, |
| "logits/rejected": 0.12481331825256348, |
| "logps/chosen": -41.96209716796875, |
| "logps/rejected": -66.62129211425781, |
| "loss": 0.3694, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4350067377090454, |
| "rewards/margins": 0.967208981513977, |
| "rewards/rejected": -1.4022157192230225, |
| "step": 4176 |
| }, |
| { |
| "epoch": 2.489311163895487, |
| "grad_norm": 49.5, |
| "learning_rate": 5.021377672209026e-07, |
| "logits/chosen": 0.07601352035999298, |
| "logits/rejected": 0.17555804550647736, |
| "logps/chosen": -41.83412170410156, |
| "logps/rejected": -62.92101287841797, |
| "loss": 0.385, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4230985641479492, |
| "rewards/margins": 0.9050178527832031, |
| "rewards/rejected": -1.3281164169311523, |
| "step": 4192 |
| }, |
| { |
| "epoch": 2.498812351543943, |
| "grad_norm": 44.5, |
| "learning_rate": 5.002375296912114e-07, |
| "logits/chosen": 0.03947605937719345, |
| "logits/rejected": 0.14804242551326752, |
| "logps/chosen": -42.070068359375, |
| "logps/rejected": -67.8903579711914, |
| "loss": 0.3496, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3808472752571106, |
| "rewards/margins": 1.0300160646438599, |
| "rewards/rejected": -1.4108633995056152, |
| "step": 4208 |
| }, |
| { |
| "epoch": 2.508313539192399, |
| "grad_norm": 45.25, |
| "learning_rate": 4.983372921615201e-07, |
| "logits/chosen": 0.05546602979302406, |
| "logits/rejected": 0.106099434196949, |
| "logps/chosen": -39.989471435546875, |
| "logps/rejected": -63.233856201171875, |
| "loss": 0.3484, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.33619987964630127, |
| "rewards/margins": 1.0194822549819946, |
| "rewards/rejected": -1.3556820154190063, |
| "step": 4224 |
| }, |
| { |
| "epoch": 2.517814726840855, |
| "grad_norm": 52.0, |
| "learning_rate": 4.96437054631829e-07, |
| "logits/chosen": 0.026712900027632713, |
| "logits/rejected": 0.09488269686698914, |
| "logps/chosen": -41.7761344909668, |
| "logps/rejected": -60.326847076416016, |
| "loss": 0.3839, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4112342596054077, |
| "rewards/margins": 0.9102669358253479, |
| "rewards/rejected": -1.3215012550354004, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.527315914489311, |
| "grad_norm": 47.5, |
| "learning_rate": 4.945368171021377e-07, |
| "logits/chosen": 0.050753381103277206, |
| "logits/rejected": 0.10667050629854202, |
| "logps/chosen": -42.3249626159668, |
| "logps/rejected": -64.19955444335938, |
| "loss": 0.4062, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.46207451820373535, |
| "rewards/margins": 0.864716649055481, |
| "rewards/rejected": -1.3267912864685059, |
| "step": 4256 |
| }, |
| { |
| "epoch": 2.5368171021377672, |
| "grad_norm": 47.25, |
| "learning_rate": 4.926365795724465e-07, |
| "logits/chosen": 0.007675782777369022, |
| "logits/rejected": 0.06300470232963562, |
| "logps/chosen": -42.04328918457031, |
| "logps/rejected": -61.39952850341797, |
| "loss": 0.3991, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.45607635378837585, |
| "rewards/margins": 0.8779357671737671, |
| "rewards/rejected": -1.3340120315551758, |
| "step": 4272 |
| }, |
| { |
| "epoch": 2.5463182897862233, |
| "grad_norm": 52.75, |
| "learning_rate": 4.907363420427553e-07, |
| "logits/chosen": 0.08944439142942429, |
| "logits/rejected": 0.16112002730369568, |
| "logps/chosen": -42.05665588378906, |
| "logps/rejected": -65.32984924316406, |
| "loss": 0.3578, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.43149158358573914, |
| "rewards/margins": 0.9926659464836121, |
| "rewards/rejected": -1.4241576194763184, |
| "step": 4288 |
| }, |
| { |
| "epoch": 2.5558194774346794, |
| "grad_norm": 43.75, |
| "learning_rate": 4.888361045130641e-07, |
| "logits/chosen": 0.048501964658498764, |
| "logits/rejected": 0.11412826180458069, |
| "logps/chosen": -40.358360290527344, |
| "logps/rejected": -62.57841110229492, |
| "loss": 0.3745, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3942060172557831, |
| "rewards/margins": 0.9297899007797241, |
| "rewards/rejected": -1.32399582862854, |
| "step": 4304 |
| }, |
| { |
| "epoch": 2.5653206650831355, |
| "grad_norm": 62.5, |
| "learning_rate": 4.869358669833729e-07, |
| "logits/chosen": 0.048068106174468994, |
| "logits/rejected": 0.11154348403215408, |
| "logps/chosen": -42.39956283569336, |
| "logps/rejected": -64.91427612304688, |
| "loss": 0.3943, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.42655476927757263, |
| "rewards/margins": 0.9072946310043335, |
| "rewards/rejected": -1.3338494300842285, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.5748218527315916, |
| "grad_norm": 44.25, |
| "learning_rate": 4.850356294536817e-07, |
| "logits/chosen": 0.0894806981086731, |
| "logits/rejected": 0.12811443209648132, |
| "logps/chosen": -42.88471603393555, |
| "logps/rejected": -63.97032165527344, |
| "loss": 0.3795, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4201812446117401, |
| "rewards/margins": 0.9957151412963867, |
| "rewards/rejected": -1.4158962965011597, |
| "step": 4336 |
| }, |
| { |
| "epoch": 2.5843230403800472, |
| "grad_norm": 47.25, |
| "learning_rate": 4.831353919239905e-07, |
| "logits/chosen": 0.11298425495624542, |
| "logits/rejected": 0.12845686078071594, |
| "logps/chosen": -42.59984588623047, |
| "logps/rejected": -62.72551727294922, |
| "loss": 0.4004, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4689531922340393, |
| "rewards/margins": 0.8442877531051636, |
| "rewards/rejected": -1.3132410049438477, |
| "step": 4352 |
| }, |
| { |
| "epoch": 2.5938242280285033, |
| "grad_norm": 41.5, |
| "learning_rate": 4.812351543942993e-07, |
| "logits/chosen": 0.03180324286222458, |
| "logits/rejected": 0.11583473533391953, |
| "logps/chosen": -41.20764923095703, |
| "logps/rejected": -62.94646453857422, |
| "loss": 0.3847, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.42716217041015625, |
| "rewards/margins": 0.8983253240585327, |
| "rewards/rejected": -1.3254876136779785, |
| "step": 4368 |
| }, |
| { |
| "epoch": 2.6033254156769594, |
| "grad_norm": 53.5, |
| "learning_rate": 4.793349168646081e-07, |
| "logits/chosen": 0.09349140524864197, |
| "logits/rejected": 0.1733095794916153, |
| "logps/chosen": -42.4179573059082, |
| "logps/rejected": -64.96019744873047, |
| "loss": 0.3653, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.4492948353290558, |
| "rewards/margins": 0.9919961094856262, |
| "rewards/rejected": -1.4412909746170044, |
| "step": 4384 |
| }, |
| { |
| "epoch": 2.6128266033254155, |
| "grad_norm": 42.5, |
| "learning_rate": 4.774346793349168e-07, |
| "logits/chosen": -0.035068899393081665, |
| "logits/rejected": 0.11121690273284912, |
| "logps/chosen": -38.56016540527344, |
| "logps/rejected": -62.15085220336914, |
| "loss": 0.3403, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.33855387568473816, |
| "rewards/margins": 1.0478910207748413, |
| "rewards/rejected": -1.3864448070526123, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.6223277909738716, |
| "grad_norm": 38.75, |
| "learning_rate": 4.755344418052256e-07, |
| "logits/chosen": 0.08347325026988983, |
| "logits/rejected": 0.181388258934021, |
| "logps/chosen": -42.476497650146484, |
| "logps/rejected": -65.1061019897461, |
| "loss": 0.3708, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.4493361711502075, |
| "rewards/margins": 0.9572871327400208, |
| "rewards/rejected": -1.4066232442855835, |
| "step": 4416 |
| }, |
| { |
| "epoch": 2.6318289786223277, |
| "grad_norm": 34.25, |
| "learning_rate": 4.736342042755344e-07, |
| "logits/chosen": -0.0012733405455946922, |
| "logits/rejected": 0.10057978332042694, |
| "logps/chosen": -40.456119537353516, |
| "logps/rejected": -63.483036041259766, |
| "loss": 0.3623, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.35637179017066956, |
| "rewards/margins": 0.99317467212677, |
| "rewards/rejected": -1.3495464324951172, |
| "step": 4432 |
| }, |
| { |
| "epoch": 2.6413301662707838, |
| "grad_norm": 60.0, |
| "learning_rate": 4.717339667458432e-07, |
| "logits/chosen": 0.04187817499041557, |
| "logits/rejected": 0.11814681440591812, |
| "logps/chosen": -42.3458251953125, |
| "logps/rejected": -61.30575180053711, |
| "loss": 0.4308, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.46507805585861206, |
| "rewards/margins": 0.7858736515045166, |
| "rewards/rejected": -1.2509517669677734, |
| "step": 4448 |
| }, |
| { |
| "epoch": 2.65083135391924, |
| "grad_norm": 43.25, |
| "learning_rate": 4.69833729216152e-07, |
| "logits/chosen": 0.02010425180196762, |
| "logits/rejected": 0.12688446044921875, |
| "logps/chosen": -41.3795051574707, |
| "logps/rejected": -62.74733352661133, |
| "loss": 0.3521, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.40290382504463196, |
| "rewards/margins": 0.9980502128601074, |
| "rewards/rejected": -1.4009541273117065, |
| "step": 4464 |
| }, |
| { |
| "epoch": 2.660332541567696, |
| "grad_norm": 49.75, |
| "learning_rate": 4.679334916864608e-07, |
| "logits/chosen": 0.04653356224298477, |
| "logits/rejected": 0.1141427606344223, |
| "logps/chosen": -42.21457290649414, |
| "logps/rejected": -63.52908706665039, |
| "loss": 0.3789, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.39218419790267944, |
| "rewards/margins": 0.9429512023925781, |
| "rewards/rejected": -1.3351353406906128, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.669833729216152, |
| "grad_norm": 49.0, |
| "learning_rate": 4.660332541567696e-07, |
| "logits/chosen": 0.03440314531326294, |
| "logits/rejected": 0.06475966423749924, |
| "logps/chosen": -40.21307373046875, |
| "logps/rejected": -61.209136962890625, |
| "loss": 0.3654, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.36982235312461853, |
| "rewards/margins": 0.9860597252845764, |
| "rewards/rejected": -1.355882167816162, |
| "step": 4496 |
| }, |
| { |
| "epoch": 2.679334916864608, |
| "grad_norm": 40.75, |
| "learning_rate": 4.641330166270784e-07, |
| "logits/chosen": 0.05200592428445816, |
| "logits/rejected": 0.09623152762651443, |
| "logps/chosen": -40.480892181396484, |
| "logps/rejected": -61.244468688964844, |
| "loss": 0.3711, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.32693397998809814, |
| "rewards/margins": 0.9501146078109741, |
| "rewards/rejected": -1.2770487070083618, |
| "step": 4512 |
| }, |
| { |
| "epoch": 2.688836104513064, |
| "grad_norm": 44.0, |
| "learning_rate": 4.622327790973872e-07, |
| "logits/chosen": 0.011258577927947044, |
| "logits/rejected": 0.15000179409980774, |
| "logps/chosen": -42.406429290771484, |
| "logps/rejected": -67.35293579101562, |
| "loss": 0.3564, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.42154744267463684, |
| "rewards/margins": 1.0057048797607422, |
| "rewards/rejected": -1.427252173423767, |
| "step": 4528 |
| }, |
| { |
| "epoch": 2.6983372921615203, |
| "grad_norm": 42.25, |
| "learning_rate": 4.60332541567696e-07, |
| "logits/chosen": 0.01903606206178665, |
| "logits/rejected": 0.128606379032135, |
| "logps/chosen": -38.58045959472656, |
| "logps/rejected": -61.52817916870117, |
| "loss": 0.3749, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3256430923938751, |
| "rewards/margins": 0.9467271566390991, |
| "rewards/rejected": -1.2723702192306519, |
| "step": 4544 |
| }, |
| { |
| "epoch": 2.7078384798099764, |
| "grad_norm": 62.25, |
| "learning_rate": 4.584323040380047e-07, |
| "logits/chosen": 0.03335714340209961, |
| "logits/rejected": 0.16635264456272125, |
| "logps/chosen": -39.833648681640625, |
| "logps/rejected": -61.806251525878906, |
| "loss": 0.3795, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3701300024986267, |
| "rewards/margins": 0.9247270226478577, |
| "rewards/rejected": -1.2948570251464844, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.7173396674584325, |
| "grad_norm": 34.0, |
| "learning_rate": 4.565320665083135e-07, |
| "logits/chosen": 0.011373243294656277, |
| "logits/rejected": 0.1352601945400238, |
| "logps/chosen": -41.90055465698242, |
| "logps/rejected": -65.95397186279297, |
| "loss": 0.372, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.4336491525173187, |
| "rewards/margins": 0.9594928026199341, |
| "rewards/rejected": -1.3931418657302856, |
| "step": 4576 |
| }, |
| { |
| "epoch": 2.7268408551068886, |
| "grad_norm": 40.75, |
| "learning_rate": 4.5463182897862227e-07, |
| "logits/chosen": 0.03081374615430832, |
| "logits/rejected": 0.1375807374715805, |
| "logps/chosen": -40.30317687988281, |
| "logps/rejected": -62.725467681884766, |
| "loss": 0.3491, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.33172154426574707, |
| "rewards/margins": 1.030013918876648, |
| "rewards/rejected": -1.3617355823516846, |
| "step": 4592 |
| }, |
| { |
| "epoch": 2.7363420427553447, |
| "grad_norm": 53.5, |
| "learning_rate": 4.5273159144893107e-07, |
| "logits/chosen": 0.002102881669998169, |
| "logits/rejected": 0.05711708217859268, |
| "logps/chosen": -39.306236267089844, |
| "logps/rejected": -62.230323791503906, |
| "loss": 0.3496, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3352733254432678, |
| "rewards/margins": 1.0495156049728394, |
| "rewards/rejected": -1.3847888708114624, |
| "step": 4608 |
| }, |
| { |
| "epoch": 2.7458432304038007, |
| "grad_norm": 54.0, |
| "learning_rate": 4.5083135391923986e-07, |
| "logits/chosen": 0.02053050696849823, |
| "logits/rejected": 0.12392740696668625, |
| "logps/chosen": -41.42273712158203, |
| "logps/rejected": -63.603023529052734, |
| "loss": 0.3822, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4146535396575928, |
| "rewards/margins": 0.9132061004638672, |
| "rewards/rejected": -1.32785964012146, |
| "step": 4624 |
| }, |
| { |
| "epoch": 2.7553444180522564, |
| "grad_norm": 38.5, |
| "learning_rate": 4.4893111638954866e-07, |
| "logits/chosen": 0.0008830556180328131, |
| "logits/rejected": 0.11984378099441528, |
| "logps/chosen": -41.754310607910156, |
| "logps/rejected": -67.43799591064453, |
| "loss": 0.3531, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4328157305717468, |
| "rewards/margins": 1.053360939025879, |
| "rewards/rejected": -1.4861767292022705, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.7648456057007125, |
| "grad_norm": 51.5, |
| "learning_rate": 4.4703087885985746e-07, |
| "logits/chosen": 0.0631004199385643, |
| "logits/rejected": 0.13722942769527435, |
| "logps/chosen": -41.07665252685547, |
| "logps/rejected": -62.44814682006836, |
| "loss": 0.3926, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.39121901988983154, |
| "rewards/margins": 0.9159356355667114, |
| "rewards/rejected": -1.307154655456543, |
| "step": 4656 |
| }, |
| { |
| "epoch": 2.7743467933491686, |
| "grad_norm": 50.25, |
| "learning_rate": 4.4513064133016626e-07, |
| "logits/chosen": 0.034338682889938354, |
| "logits/rejected": 0.18351401388645172, |
| "logps/chosen": -40.274269104003906, |
| "logps/rejected": -65.64212799072266, |
| "loss": 0.3389, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.34793734550476074, |
| "rewards/margins": 1.0671257972717285, |
| "rewards/rejected": -1.4150630235671997, |
| "step": 4672 |
| }, |
| { |
| "epoch": 2.7838479809976246, |
| "grad_norm": 44.25, |
| "learning_rate": 4.4323040380047505e-07, |
| "logits/chosen": 0.0658361166715622, |
| "logits/rejected": 0.22124770283699036, |
| "logps/chosen": -41.521907806396484, |
| "logps/rejected": -66.72534942626953, |
| "loss": 0.337, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.3596152663230896, |
| "rewards/margins": 1.0657168626785278, |
| "rewards/rejected": -1.4253321886062622, |
| "step": 4688 |
| }, |
| { |
| "epoch": 2.7933491686460807, |
| "grad_norm": 53.5, |
| "learning_rate": 4.4133016627078385e-07, |
| "logits/chosen": 0.09054507315158844, |
| "logits/rejected": 0.17531618475914001, |
| "logps/chosen": -39.683006286621094, |
| "logps/rejected": -65.05414581298828, |
| "loss": 0.3834, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.39716941118240356, |
| "rewards/margins": 0.9602290987968445, |
| "rewards/rejected": -1.357398271560669, |
| "step": 4704 |
| }, |
| { |
| "epoch": 2.802850356294537, |
| "grad_norm": 36.25, |
| "learning_rate": 4.3942992874109265e-07, |
| "logits/chosen": 0.03398045897483826, |
| "logits/rejected": 0.1653180718421936, |
| "logps/chosen": -41.86040496826172, |
| "logps/rejected": -66.90827941894531, |
| "loss": 0.3572, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3843865394592285, |
| "rewards/margins": 1.0734546184539795, |
| "rewards/rejected": -1.457841157913208, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.812351543942993, |
| "grad_norm": 37.5, |
| "learning_rate": 4.3752969121140144e-07, |
| "logits/chosen": 0.039868682622909546, |
| "logits/rejected": 0.1837385594844818, |
| "logps/chosen": -40.565216064453125, |
| "logps/rejected": -63.868896484375, |
| "loss": 0.3527, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.378643661737442, |
| "rewards/margins": 0.9983595013618469, |
| "rewards/rejected": -1.3770031929016113, |
| "step": 4736 |
| }, |
| { |
| "epoch": 2.821852731591449, |
| "grad_norm": 55.25, |
| "learning_rate": 4.356294536817102e-07, |
| "logits/chosen": 0.09310074895620346, |
| "logits/rejected": 0.14162081480026245, |
| "logps/chosen": -41.470603942871094, |
| "logps/rejected": -65.76705169677734, |
| "loss": 0.3606, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4034523069858551, |
| "rewards/margins": 1.0022685527801514, |
| "rewards/rejected": -1.4057209491729736, |
| "step": 4752 |
| }, |
| { |
| "epoch": 2.831353919239905, |
| "grad_norm": 41.75, |
| "learning_rate": 4.33729216152019e-07, |
| "logits/chosen": 0.11650769412517548, |
| "logits/rejected": 0.15587171912193298, |
| "logps/chosen": -41.557228088378906, |
| "logps/rejected": -60.604087829589844, |
| "loss": 0.3735, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.4029754400253296, |
| "rewards/margins": 0.8826481103897095, |
| "rewards/rejected": -1.2856236696243286, |
| "step": 4768 |
| }, |
| { |
| "epoch": 2.840855106888361, |
| "grad_norm": 43.75, |
| "learning_rate": 4.318289786223278e-07, |
| "logits/chosen": 0.02145340107381344, |
| "logits/rejected": 0.14118751883506775, |
| "logps/chosen": -42.241329193115234, |
| "logps/rejected": -64.31900024414062, |
| "loss": 0.3717, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4430907368659973, |
| "rewards/margins": 0.9653340578079224, |
| "rewards/rejected": -1.4084248542785645, |
| "step": 4784 |
| }, |
| { |
| "epoch": 2.850356294536817, |
| "grad_norm": 43.25, |
| "learning_rate": 4.299287410926365e-07, |
| "logits/chosen": 0.026168465614318848, |
| "logits/rejected": 0.06277532130479813, |
| "logps/chosen": -41.47759246826172, |
| "logps/rejected": -60.46055603027344, |
| "loss": 0.4074, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.43215835094451904, |
| "rewards/margins": 0.8435641527175903, |
| "rewards/rejected": -1.2757225036621094, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.859857482185273, |
| "grad_norm": 42.25, |
| "learning_rate": 4.280285035629453e-07, |
| "logits/chosen": 0.074520543217659, |
| "logits/rejected": 0.16152548789978027, |
| "logps/chosen": -41.94219970703125, |
| "logps/rejected": -64.68109893798828, |
| "loss": 0.3645, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3893333375453949, |
| "rewards/margins": 0.9812647104263306, |
| "rewards/rejected": -1.3705980777740479, |
| "step": 4816 |
| }, |
| { |
| "epoch": 2.869358669833729, |
| "grad_norm": 39.25, |
| "learning_rate": 4.261282660332541e-07, |
| "logits/chosen": 0.02079102396965027, |
| "logits/rejected": 0.15321360528469086, |
| "logps/chosen": -39.940887451171875, |
| "logps/rejected": -64.5802230834961, |
| "loss": 0.3704, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.41797706484794617, |
| "rewards/margins": 0.9673931002616882, |
| "rewards/rejected": -1.385370135307312, |
| "step": 4832 |
| }, |
| { |
| "epoch": 2.878859857482185, |
| "grad_norm": 56.75, |
| "learning_rate": 4.242280285035629e-07, |
| "logits/chosen": 0.020523881539702415, |
| "logits/rejected": 0.18188393115997314, |
| "logps/chosen": -40.60096740722656, |
| "logps/rejected": -64.39540100097656, |
| "loss": 0.3794, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.42755821347236633, |
| "rewards/margins": 0.9751715064048767, |
| "rewards/rejected": -1.4027297496795654, |
| "step": 4848 |
| }, |
| { |
| "epoch": 2.888361045130641, |
| "grad_norm": 41.0, |
| "learning_rate": 4.223277909738717e-07, |
| "logits/chosen": 0.038833338767290115, |
| "logits/rejected": 0.15533462166786194, |
| "logps/chosen": -39.362998962402344, |
| "logps/rejected": -65.98983001708984, |
| "loss": 0.3129, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.29717180132865906, |
| "rewards/margins": 1.1468960046768188, |
| "rewards/rejected": -1.4440677165985107, |
| "step": 4864 |
| }, |
| { |
| "epoch": 2.8978622327790973, |
| "grad_norm": 63.75, |
| "learning_rate": 4.204275534441805e-07, |
| "logits/chosen": 0.05080319941043854, |
| "logits/rejected": 0.11638832837343216, |
| "logps/chosen": -40.73345184326172, |
| "logps/rejected": -62.49778747558594, |
| "loss": 0.387, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3901386260986328, |
| "rewards/margins": 0.9359588623046875, |
| "rewards/rejected": -1.3260974884033203, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.9073634204275534, |
| "grad_norm": 41.0, |
| "learning_rate": 4.185273159144893e-07, |
| "logits/chosen": 0.05678907781839371, |
| "logits/rejected": 0.19164641201496124, |
| "logps/chosen": -40.55916213989258, |
| "logps/rejected": -65.99730682373047, |
| "loss": 0.3831, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.3813077211380005, |
| "rewards/margins": 0.9531018733978271, |
| "rewards/rejected": -1.3344097137451172, |
| "step": 4896 |
| }, |
| { |
| "epoch": 2.9168646080760094, |
| "grad_norm": 53.0, |
| "learning_rate": 4.166270783847981e-07, |
| "logits/chosen": -0.0027349982410669327, |
| "logits/rejected": 0.0990891233086586, |
| "logps/chosen": -39.93202209472656, |
| "logps/rejected": -62.99840545654297, |
| "loss": 0.3721, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.36999353766441345, |
| "rewards/margins": 0.9719130992889404, |
| "rewards/rejected": -1.3419066667556763, |
| "step": 4912 |
| }, |
| { |
| "epoch": 2.9263657957244655, |
| "grad_norm": 40.25, |
| "learning_rate": 4.147268408551069e-07, |
| "logits/chosen": 0.05395427346229553, |
| "logits/rejected": 0.13786588609218597, |
| "logps/chosen": -40.425132751464844, |
| "logps/rejected": -61.25279998779297, |
| "loss": 0.3538, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.34139615297317505, |
| "rewards/margins": 1.0007226467132568, |
| "rewards/rejected": -1.3421188592910767, |
| "step": 4928 |
| }, |
| { |
| "epoch": 2.9358669833729216, |
| "grad_norm": 65.0, |
| "learning_rate": 4.1282660332541564e-07, |
| "logits/chosen": 0.09111806005239487, |
| "logits/rejected": 0.13585761189460754, |
| "logps/chosen": -43.16571807861328, |
| "logps/rejected": -64.36392211914062, |
| "loss": 0.3918, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4209873378276825, |
| "rewards/margins": 0.8701137900352478, |
| "rewards/rejected": -1.2911012172698975, |
| "step": 4944 |
| }, |
| { |
| "epoch": 2.9453681710213777, |
| "grad_norm": 53.0, |
| "learning_rate": 4.1092636579572444e-07, |
| "logits/chosen": 0.05853826552629471, |
| "logits/rejected": 0.18161046504974365, |
| "logps/chosen": -40.96638488769531, |
| "logps/rejected": -66.1131591796875, |
| "loss": 0.3467, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3545738160610199, |
| "rewards/margins": 1.0768417119979858, |
| "rewards/rejected": -1.4314155578613281, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.954869358669834, |
| "grad_norm": 41.5, |
| "learning_rate": 4.0902612826603324e-07, |
| "logits/chosen": 0.06567525863647461, |
| "logits/rejected": 0.18003502488136292, |
| "logps/chosen": -39.08511734008789, |
| "logps/rejected": -61.72608947753906, |
| "loss": 0.3658, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3397156000137329, |
| "rewards/margins": 1.0000014305114746, |
| "rewards/rejected": -1.3397170305252075, |
| "step": 4976 |
| }, |
| { |
| "epoch": 2.96437054631829, |
| "grad_norm": 44.25, |
| "learning_rate": 4.0712589073634203e-07, |
| "logits/chosen": 0.016267111524939537, |
| "logits/rejected": 0.13151559233665466, |
| "logps/chosen": -40.71261978149414, |
| "logps/rejected": -62.006256103515625, |
| "loss": 0.3973, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.4248185455799103, |
| "rewards/margins": 0.8792138695716858, |
| "rewards/rejected": -1.3040324449539185, |
| "step": 4992 |
| }, |
| { |
| "epoch": 2.973871733966746, |
| "grad_norm": 58.5, |
| "learning_rate": 4.0522565320665083e-07, |
| "logits/chosen": 0.03460274264216423, |
| "logits/rejected": 0.13233664631843567, |
| "logps/chosen": -40.716407775878906, |
| "logps/rejected": -62.79671096801758, |
| "loss": 0.3879, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4130927622318268, |
| "rewards/margins": 0.9299353957176208, |
| "rewards/rejected": -1.3430280685424805, |
| "step": 5008 |
| }, |
| { |
| "epoch": 2.983372921615202, |
| "grad_norm": 47.0, |
| "learning_rate": 4.0332541567695963e-07, |
| "logits/chosen": -0.026960894465446472, |
| "logits/rejected": 0.11078447848558426, |
| "logps/chosen": -39.08903121948242, |
| "logps/rejected": -62.35736083984375, |
| "loss": 0.3588, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3670194745063782, |
| "rewards/margins": 1.0107150077819824, |
| "rewards/rejected": -1.3777345418930054, |
| "step": 5024 |
| }, |
| { |
| "epoch": 2.992874109263658, |
| "grad_norm": 54.0, |
| "learning_rate": 4.0142517814726837e-07, |
| "logits/chosen": 0.026894917711615562, |
| "logits/rejected": 0.1446702629327774, |
| "logps/chosen": -41.22724914550781, |
| "logps/rejected": -64.66578674316406, |
| "loss": 0.3601, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3958403468132019, |
| "rewards/margins": 0.9907092452049255, |
| "rewards/rejected": -1.386549472808838, |
| "step": 5040 |
| }, |
| { |
| "epoch": 3.002375296912114, |
| "grad_norm": 38.25, |
| "learning_rate": 3.9952494061757717e-07, |
| "logits/chosen": 0.0721140205860138, |
| "logits/rejected": 0.08904615044593811, |
| "logps/chosen": -42.36117172241211, |
| "logps/rejected": -62.753578186035156, |
| "loss": 0.3669, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.4690694510936737, |
| "rewards/margins": 0.9362343549728394, |
| "rewards/rejected": -1.4053038358688354, |
| "step": 5056 |
| }, |
| { |
| "epoch": 3.01187648456057, |
| "grad_norm": 38.75, |
| "learning_rate": 3.9762470308788596e-07, |
| "logits/chosen": 0.08058139681816101, |
| "logits/rejected": 0.14635083079338074, |
| "logps/chosen": -40.83808135986328, |
| "logps/rejected": -62.58879470825195, |
| "loss": 0.3736, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.40799736976623535, |
| "rewards/margins": 0.9339090585708618, |
| "rewards/rejected": -1.3419064283370972, |
| "step": 5072 |
| }, |
| { |
| "epoch": 3.021377672209026, |
| "grad_norm": 49.25, |
| "learning_rate": 3.9572446555819476e-07, |
| "logits/chosen": 0.07044284790754318, |
| "logits/rejected": 0.14013811945915222, |
| "logps/chosen": -40.04065704345703, |
| "logps/rejected": -60.96607971191406, |
| "loss": 0.3681, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.35846540331840515, |
| "rewards/margins": 0.9638465642929077, |
| "rewards/rejected": -1.3223118782043457, |
| "step": 5088 |
| }, |
| { |
| "epoch": 3.030878859857482, |
| "grad_norm": 52.0, |
| "learning_rate": 3.9382422802850356e-07, |
| "logits/chosen": 0.060851648449897766, |
| "logits/rejected": 0.1331576406955719, |
| "logps/chosen": -41.8791618347168, |
| "logps/rejected": -64.0675048828125, |
| "loss": 0.3568, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.359686017036438, |
| "rewards/margins": 0.9825432300567627, |
| "rewards/rejected": -1.3422291278839111, |
| "step": 5104 |
| }, |
| { |
| "epoch": 3.040380047505938, |
| "grad_norm": 40.5, |
| "learning_rate": 3.919239904988123e-07, |
| "logits/chosen": 0.009696897119283676, |
| "logits/rejected": 0.12189489603042603, |
| "logps/chosen": -39.73085021972656, |
| "logps/rejected": -63.392818450927734, |
| "loss": 0.3708, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3765300512313843, |
| "rewards/margins": 0.9506427049636841, |
| "rewards/rejected": -1.3271726369857788, |
| "step": 5120 |
| }, |
| { |
| "epoch": 3.0498812351543942, |
| "grad_norm": 48.75, |
| "learning_rate": 3.900237529691211e-07, |
| "logits/chosen": 0.10276854038238525, |
| "logits/rejected": 0.13192720711231232, |
| "logps/chosen": -42.13685989379883, |
| "logps/rejected": -63.64410400390625, |
| "loss": 0.3788, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.39574676752090454, |
| "rewards/margins": 0.9484460949897766, |
| "rewards/rejected": -1.3441928625106812, |
| "step": 5136 |
| }, |
| { |
| "epoch": 3.0593824228028503, |
| "grad_norm": 52.25, |
| "learning_rate": 3.881235154394299e-07, |
| "logits/chosen": 0.11582867801189423, |
| "logits/rejected": 0.11568093299865723, |
| "logps/chosen": -43.7314338684082, |
| "logps/rejected": -63.54302215576172, |
| "loss": 0.3977, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.4822978973388672, |
| "rewards/margins": 0.8951501846313477, |
| "rewards/rejected": -1.3774480819702148, |
| "step": 5152 |
| }, |
| { |
| "epoch": 3.0688836104513064, |
| "grad_norm": 49.25, |
| "learning_rate": 3.862232779097387e-07, |
| "logits/chosen": 0.08060070872306824, |
| "logits/rejected": 0.10565990209579468, |
| "logps/chosen": -41.34340286254883, |
| "logps/rejected": -63.646820068359375, |
| "loss": 0.3848, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3923201858997345, |
| "rewards/margins": 0.9066511392593384, |
| "rewards/rejected": -1.29897141456604, |
| "step": 5168 |
| }, |
| { |
| "epoch": 3.0783847980997625, |
| "grad_norm": 46.25, |
| "learning_rate": 3.843230403800475e-07, |
| "logits/chosen": 0.055783893913030624, |
| "logits/rejected": 0.14008364081382751, |
| "logps/chosen": -41.782936096191406, |
| "logps/rejected": -62.20945739746094, |
| "loss": 0.3836, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.39728862047195435, |
| "rewards/margins": 0.8838520050048828, |
| "rewards/rejected": -1.2811405658721924, |
| "step": 5184 |
| }, |
| { |
| "epoch": 3.0878859857482186, |
| "grad_norm": 43.5, |
| "learning_rate": 3.824228028503563e-07, |
| "logits/chosen": 0.025185655802488327, |
| "logits/rejected": 0.12884651124477386, |
| "logps/chosen": -41.46108627319336, |
| "logps/rejected": -65.40599822998047, |
| "loss": 0.3744, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.3796524405479431, |
| "rewards/margins": 0.9922239780426025, |
| "rewards/rejected": -1.3718763589859009, |
| "step": 5200 |
| }, |
| { |
| "epoch": 3.0973871733966747, |
| "grad_norm": 57.25, |
| "learning_rate": 3.805225653206651e-07, |
| "logits/chosen": 0.036128733307123184, |
| "logits/rejected": 0.14582857489585876, |
| "logps/chosen": -40.435264587402344, |
| "logps/rejected": -62.72749328613281, |
| "loss": 0.3616, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.34147587418556213, |
| "rewards/margins": 1.0060913562774658, |
| "rewards/rejected": -1.3475672006607056, |
| "step": 5216 |
| }, |
| { |
| "epoch": 3.1068883610451308, |
| "grad_norm": 54.25, |
| "learning_rate": 3.786223277909739e-07, |
| "logits/chosen": 0.08208269625902176, |
| "logits/rejected": 0.1514563262462616, |
| "logps/chosen": -43.135169982910156, |
| "logps/rejected": -62.251564025878906, |
| "loss": 0.4333, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -0.4657037854194641, |
| "rewards/margins": 0.7968182563781738, |
| "rewards/rejected": -1.2625218629837036, |
| "step": 5232 |
| }, |
| { |
| "epoch": 3.116389548693587, |
| "grad_norm": 47.75, |
| "learning_rate": 3.767220902612827e-07, |
| "logits/chosen": 0.0023325812071561813, |
| "logits/rejected": 0.07401884347200394, |
| "logps/chosen": -42.72369384765625, |
| "logps/rejected": -65.19200134277344, |
| "loss": 0.3696, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4340641498565674, |
| "rewards/margins": 0.9625180959701538, |
| "rewards/rejected": -1.3965823650360107, |
| "step": 5248 |
| }, |
| { |
| "epoch": 3.125890736342043, |
| "grad_norm": 55.5, |
| "learning_rate": 3.748218527315915e-07, |
| "logits/chosen": 0.04036543890833855, |
| "logits/rejected": 0.1542033553123474, |
| "logps/chosen": -40.91764831542969, |
| "logps/rejected": -67.87660217285156, |
| "loss": 0.3403, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.42045873403549194, |
| "rewards/margins": 1.0873808860778809, |
| "rewards/rejected": -1.5078396797180176, |
| "step": 5264 |
| }, |
| { |
| "epoch": 3.1353919239904986, |
| "grad_norm": 55.0, |
| "learning_rate": 3.729216152019002e-07, |
| "logits/chosen": 0.0443246066570282, |
| "logits/rejected": 0.21922534704208374, |
| "logps/chosen": -42.55435562133789, |
| "logps/rejected": -65.33242797851562, |
| "loss": 0.4199, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.4705503284931183, |
| "rewards/margins": 0.8472099304199219, |
| "rewards/rejected": -1.3177603483200073, |
| "step": 5280 |
| }, |
| { |
| "epoch": 3.1448931116389547, |
| "grad_norm": 53.25, |
| "learning_rate": 3.71021377672209e-07, |
| "logits/chosen": 0.009390661492943764, |
| "logits/rejected": 0.12596291303634644, |
| "logps/chosen": -40.291473388671875, |
| "logps/rejected": -62.212562561035156, |
| "loss": 0.3862, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4117724299430847, |
| "rewards/margins": 0.9006836414337158, |
| "rewards/rejected": -1.3124560117721558, |
| "step": 5296 |
| }, |
| { |
| "epoch": 3.1543942992874108, |
| "grad_norm": 50.0, |
| "learning_rate": 3.6912114014251776e-07, |
| "logits/chosen": -0.022532382979989052, |
| "logits/rejected": 0.11396850645542145, |
| "logps/chosen": -40.82377624511719, |
| "logps/rejected": -63.843467712402344, |
| "loss": 0.3762, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.40509068965911865, |
| "rewards/margins": 0.9473409652709961, |
| "rewards/rejected": -1.3524316549301147, |
| "step": 5312 |
| }, |
| { |
| "epoch": 3.163895486935867, |
| "grad_norm": 51.25, |
| "learning_rate": 3.6722090261282656e-07, |
| "logits/chosen": -0.019587915390729904, |
| "logits/rejected": 0.09787953644990921, |
| "logps/chosen": -38.83479690551758, |
| "logps/rejected": -62.37450408935547, |
| "loss": 0.3457, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.31038060784339905, |
| "rewards/margins": 1.0302282571792603, |
| "rewards/rejected": -1.340608835220337, |
| "step": 5328 |
| }, |
| { |
| "epoch": 3.173396674584323, |
| "grad_norm": 45.0, |
| "learning_rate": 3.6532066508313535e-07, |
| "logits/chosen": 0.0068716611713171005, |
| "logits/rejected": 0.10715761035680771, |
| "logps/chosen": -39.36448287963867, |
| "logps/rejected": -61.61016845703125, |
| "loss": 0.3694, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.35794466733932495, |
| "rewards/margins": 0.9621202349662781, |
| "rewards/rejected": -1.3200650215148926, |
| "step": 5344 |
| }, |
| { |
| "epoch": 3.182897862232779, |
| "grad_norm": 57.5, |
| "learning_rate": 3.6342042755344415e-07, |
| "logits/chosen": 0.08224662393331528, |
| "logits/rejected": 0.10815519094467163, |
| "logps/chosen": -43.51884078979492, |
| "logps/rejected": -65.097900390625, |
| "loss": 0.3834, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4219568073749542, |
| "rewards/margins": 0.934171736240387, |
| "rewards/rejected": -1.3561286926269531, |
| "step": 5360 |
| }, |
| { |
| "epoch": 3.192399049881235, |
| "grad_norm": 47.5, |
| "learning_rate": 3.6152019002375295e-07, |
| "logits/chosen": 0.011415719985961914, |
| "logits/rejected": 0.10578812658786774, |
| "logps/chosen": -41.20673370361328, |
| "logps/rejected": -63.52978515625, |
| "loss": 0.365, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.41507184505462646, |
| "rewards/margins": 0.9821330308914185, |
| "rewards/rejected": -1.397204875946045, |
| "step": 5376 |
| }, |
| { |
| "epoch": 3.201900237529691, |
| "grad_norm": 42.5, |
| "learning_rate": 3.5961995249406174e-07, |
| "logits/chosen": 0.03810262307524681, |
| "logits/rejected": 0.12873801589012146, |
| "logps/chosen": -41.731388092041016, |
| "logps/rejected": -65.47847747802734, |
| "loss": 0.3798, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.44025665521621704, |
| "rewards/margins": 0.9518187046051025, |
| "rewards/rejected": -1.3920754194259644, |
| "step": 5392 |
| }, |
| { |
| "epoch": 3.2114014251781473, |
| "grad_norm": 49.25, |
| "learning_rate": 3.5771971496437054e-07, |
| "logits/chosen": -0.023601891472935677, |
| "logits/rejected": 0.06935597211122513, |
| "logps/chosen": -38.68426513671875, |
| "logps/rejected": -61.103973388671875, |
| "loss": 0.3634, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.37336137890815735, |
| "rewards/margins": 1.0120704174041748, |
| "rewards/rejected": -1.3854318857192993, |
| "step": 5408 |
| }, |
| { |
| "epoch": 3.2209026128266034, |
| "grad_norm": 52.5, |
| "learning_rate": 3.5581947743467934e-07, |
| "logits/chosen": 0.04542490094900131, |
| "logits/rejected": 0.09217678010463715, |
| "logps/chosen": -43.481048583984375, |
| "logps/rejected": -63.5504264831543, |
| "loss": 0.415, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.49973058700561523, |
| "rewards/margins": 0.8297138214111328, |
| "rewards/rejected": -1.3294442892074585, |
| "step": 5424 |
| }, |
| { |
| "epoch": 3.2304038004750595, |
| "grad_norm": 47.0, |
| "learning_rate": 3.5391923990498813e-07, |
| "logits/chosen": 0.10594628006219864, |
| "logits/rejected": 0.17065931856632233, |
| "logps/chosen": -40.92080307006836, |
| "logps/rejected": -64.85243225097656, |
| "loss": 0.3541, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3807818293571472, |
| "rewards/margins": 1.0301458835601807, |
| "rewards/rejected": -1.4109277725219727, |
| "step": 5440 |
| }, |
| { |
| "epoch": 3.2399049881235156, |
| "grad_norm": 66.0, |
| "learning_rate": 3.5201900237529693e-07, |
| "logits/chosen": 0.04224825277924538, |
| "logits/rejected": 0.11179321259260178, |
| "logps/chosen": -42.496681213378906, |
| "logps/rejected": -64.15071105957031, |
| "loss": 0.3789, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.41750308871269226, |
| "rewards/margins": 0.9374274015426636, |
| "rewards/rejected": -1.3549304008483887, |
| "step": 5456 |
| }, |
| { |
| "epoch": 3.2494061757719717, |
| "grad_norm": 51.75, |
| "learning_rate": 3.5011876484560573e-07, |
| "logits/chosen": 0.06084592267870903, |
| "logits/rejected": 0.13545851409435272, |
| "logps/chosen": -38.884830474853516, |
| "logps/rejected": -61.21929931640625, |
| "loss": 0.3627, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3169533908367157, |
| "rewards/margins": 0.991154670715332, |
| "rewards/rejected": -1.3081080913543701, |
| "step": 5472 |
| }, |
| { |
| "epoch": 3.2589073634204277, |
| "grad_norm": 52.5, |
| "learning_rate": 3.482185273159145e-07, |
| "logits/chosen": 0.03806290030479431, |
| "logits/rejected": 0.11884848773479462, |
| "logps/chosen": -41.503875732421875, |
| "logps/rejected": -63.86762237548828, |
| "loss": 0.3766, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.40684768557548523, |
| "rewards/margins": 0.953045129776001, |
| "rewards/rejected": -1.3598929643630981, |
| "step": 5488 |
| }, |
| { |
| "epoch": 3.268408551068884, |
| "grad_norm": 57.0, |
| "learning_rate": 3.463182897862232e-07, |
| "logits/chosen": -0.0012608803808689117, |
| "logits/rejected": 0.10703101754188538, |
| "logps/chosen": -41.227787017822266, |
| "logps/rejected": -64.53129577636719, |
| "loss": 0.3804, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.4662329852581024, |
| "rewards/margins": 0.9123408794403076, |
| "rewards/rejected": -1.3785738945007324, |
| "step": 5504 |
| }, |
| { |
| "epoch": 3.2779097387173395, |
| "grad_norm": 35.75, |
| "learning_rate": 3.44418052256532e-07, |
| "logits/chosen": 0.09872997552156448, |
| "logits/rejected": 0.16530072689056396, |
| "logps/chosen": -42.996360778808594, |
| "logps/rejected": -64.78094482421875, |
| "loss": 0.3656, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.43684783577919006, |
| "rewards/margins": 0.9832981824874878, |
| "rewards/rejected": -1.420146107673645, |
| "step": 5520 |
| }, |
| { |
| "epoch": 3.2874109263657956, |
| "grad_norm": 50.25, |
| "learning_rate": 3.425178147268408e-07, |
| "logits/chosen": 0.03449930623173714, |
| "logits/rejected": 0.17792941629886627, |
| "logps/chosen": -40.4708366394043, |
| "logps/rejected": -65.352294921875, |
| "loss": 0.3781, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.43737536668777466, |
| "rewards/margins": 0.9423834681510925, |
| "rewards/rejected": -1.3797588348388672, |
| "step": 5536 |
| }, |
| { |
| "epoch": 3.2969121140142517, |
| "grad_norm": 47.0, |
| "learning_rate": 3.406175771971496e-07, |
| "logits/chosen": 0.09654662013053894, |
| "logits/rejected": 0.17820453643798828, |
| "logps/chosen": -41.23036193847656, |
| "logps/rejected": -63.637550354003906, |
| "loss": 0.3973, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.36412250995635986, |
| "rewards/margins": 0.9050154685974121, |
| "rewards/rejected": -1.269137978553772, |
| "step": 5552 |
| }, |
| { |
| "epoch": 3.3064133016627077, |
| "grad_norm": 62.5, |
| "learning_rate": 3.387173396674584e-07, |
| "logits/chosen": 0.03694698214530945, |
| "logits/rejected": 0.13538768887519836, |
| "logps/chosen": -42.770050048828125, |
| "logps/rejected": -63.76899719238281, |
| "loss": 0.4137, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.49573254585266113, |
| "rewards/margins": 0.8381365537643433, |
| "rewards/rejected": -1.3338689804077148, |
| "step": 5568 |
| }, |
| { |
| "epoch": 3.315914489311164, |
| "grad_norm": 45.0, |
| "learning_rate": 3.368171021377672e-07, |
| "logits/chosen": 0.02915555238723755, |
| "logits/rejected": 0.1136770024895668, |
| "logps/chosen": -42.1306266784668, |
| "logps/rejected": -64.29971313476562, |
| "loss": 0.3729, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.4326779246330261, |
| "rewards/margins": 0.928207516670227, |
| "rewards/rejected": -1.360885500907898, |
| "step": 5584 |
| }, |
| { |
| "epoch": 3.32541567695962, |
| "grad_norm": 45.75, |
| "learning_rate": 3.34916864608076e-07, |
| "logits/chosen": 0.07809650897979736, |
| "logits/rejected": 0.12652210891246796, |
| "logps/chosen": -41.568660736083984, |
| "logps/rejected": -63.57560348510742, |
| "loss": 0.3613, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3894152045249939, |
| "rewards/margins": 1.0010182857513428, |
| "rewards/rejected": -1.3904335498809814, |
| "step": 5600 |
| }, |
| { |
| "epoch": 3.334916864608076, |
| "grad_norm": 58.5, |
| "learning_rate": 3.330166270783848e-07, |
| "logits/chosen": 0.040801357477903366, |
| "logits/rejected": 0.1780150681734085, |
| "logps/chosen": -41.86552810668945, |
| "logps/rejected": -65.90018463134766, |
| "loss": 0.3516, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.38096481561660767, |
| "rewards/margins": 1.0388493537902832, |
| "rewards/rejected": -1.4198143482208252, |
| "step": 5616 |
| }, |
| { |
| "epoch": 3.344418052256532, |
| "grad_norm": 53.5, |
| "learning_rate": 3.311163895486936e-07, |
| "logits/chosen": 0.01211823895573616, |
| "logits/rejected": 0.09781680256128311, |
| "logps/chosen": -41.53929138183594, |
| "logps/rejected": -63.851688385009766, |
| "loss": 0.3701, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4125053286552429, |
| "rewards/margins": 0.9792938828468323, |
| "rewards/rejected": -1.3917990922927856, |
| "step": 5632 |
| }, |
| { |
| "epoch": 3.353919239904988, |
| "grad_norm": 50.75, |
| "learning_rate": 3.292161520190024e-07, |
| "logits/chosen": 0.09114633500576019, |
| "logits/rejected": 0.15826475620269775, |
| "logps/chosen": -41.09789276123047, |
| "logps/rejected": -65.02250671386719, |
| "loss": 0.3713, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.41848012804985046, |
| "rewards/margins": 0.9512667059898376, |
| "rewards/rejected": -1.3697468042373657, |
| "step": 5648 |
| }, |
| { |
| "epoch": 3.3634204275534443, |
| "grad_norm": 51.25, |
| "learning_rate": 3.273159144893112e-07, |
| "logits/chosen": 0.021810825914144516, |
| "logits/rejected": 0.11112373322248459, |
| "logps/chosen": -43.460018157958984, |
| "logps/rejected": -64.75310516357422, |
| "loss": 0.3962, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.4909437894821167, |
| "rewards/margins": 0.8337816596031189, |
| "rewards/rejected": -1.3247253894805908, |
| "step": 5664 |
| }, |
| { |
| "epoch": 3.3729216152019004, |
| "grad_norm": 71.0, |
| "learning_rate": 3.2541567695961993e-07, |
| "logits/chosen": 0.06712229549884796, |
| "logits/rejected": 0.15457472205162048, |
| "logps/chosen": -41.23008346557617, |
| "logps/rejected": -63.24055480957031, |
| "loss": 0.3614, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.3526281416416168, |
| "rewards/margins": 0.9992504119873047, |
| "rewards/rejected": -1.3518785238265991, |
| "step": 5680 |
| }, |
| { |
| "epoch": 3.3824228028503565, |
| "grad_norm": 37.5, |
| "learning_rate": 3.235154394299287e-07, |
| "logits/chosen": 0.008397895842790604, |
| "logits/rejected": 0.11099565029144287, |
| "logps/chosen": -42.79941940307617, |
| "logps/rejected": -62.79872131347656, |
| "loss": 0.3956, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.432941198348999, |
| "rewards/margins": 0.8876244425773621, |
| "rewards/rejected": -1.3205657005310059, |
| "step": 5696 |
| }, |
| { |
| "epoch": 3.391923990498812, |
| "grad_norm": 46.75, |
| "learning_rate": 3.216152019002375e-07, |
| "logits/chosen": 0.03148173540830612, |
| "logits/rejected": 0.09412042796611786, |
| "logps/chosen": -40.27482223510742, |
| "logps/rejected": -64.9065170288086, |
| "loss": 0.3578, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.36367034912109375, |
| "rewards/margins": 1.0289617776870728, |
| "rewards/rejected": -1.392632246017456, |
| "step": 5712 |
| }, |
| { |
| "epoch": 3.401425178147268, |
| "grad_norm": 61.25, |
| "learning_rate": 3.1971496437054627e-07, |
| "logits/chosen": 0.03281719982624054, |
| "logits/rejected": 0.10020038485527039, |
| "logps/chosen": -41.20103073120117, |
| "logps/rejected": -62.454811096191406, |
| "loss": 0.3892, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.4000158905982971, |
| "rewards/margins": 0.9230848550796509, |
| "rewards/rejected": -1.3231008052825928, |
| "step": 5728 |
| }, |
| { |
| "epoch": 3.4109263657957243, |
| "grad_norm": 42.5, |
| "learning_rate": 3.1781472684085506e-07, |
| "logits/chosen": 0.03375418484210968, |
| "logits/rejected": 0.07718026638031006, |
| "logps/chosen": -42.45466613769531, |
| "logps/rejected": -62.32984924316406, |
| "loss": 0.4054, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.4012918472290039, |
| "rewards/margins": 0.8690704107284546, |
| "rewards/rejected": -1.2703622579574585, |
| "step": 5744 |
| }, |
| { |
| "epoch": 3.4204275534441804, |
| "grad_norm": 50.25, |
| "learning_rate": 3.1591448931116386e-07, |
| "logits/chosen": 0.04832587391138077, |
| "logits/rejected": 0.14456292986869812, |
| "logps/chosen": -38.7043571472168, |
| "logps/rejected": -61.933982849121094, |
| "loss": 0.368, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.33057889342308044, |
| "rewards/margins": 0.9730508923530579, |
| "rewards/rejected": -1.3036296367645264, |
| "step": 5760 |
| }, |
| { |
| "epoch": 3.4299287410926365, |
| "grad_norm": 41.5, |
| "learning_rate": 3.1401425178147266e-07, |
| "logits/chosen": 0.07459306716918945, |
| "logits/rejected": 0.08237803727388382, |
| "logps/chosen": -41.34931945800781, |
| "logps/rejected": -62.95779037475586, |
| "loss": 0.3687, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.37355130910873413, |
| "rewards/margins": 1.0145928859710693, |
| "rewards/rejected": -1.3881440162658691, |
| "step": 5776 |
| }, |
| { |
| "epoch": 3.4394299287410925, |
| "grad_norm": 57.5, |
| "learning_rate": 3.1211401425178145e-07, |
| "logits/chosen": 0.03901844099164009, |
| "logits/rejected": 0.11285356432199478, |
| "logps/chosen": -40.3481559753418, |
| "logps/rejected": -63.19662094116211, |
| "loss": 0.3758, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.3881966769695282, |
| "rewards/margins": 0.9612338542938232, |
| "rewards/rejected": -1.3494304418563843, |
| "step": 5792 |
| }, |
| { |
| "epoch": 3.4489311163895486, |
| "grad_norm": 35.75, |
| "learning_rate": 3.1021377672209025e-07, |
| "logits/chosen": -0.005038658622652292, |
| "logits/rejected": 0.06839510053396225, |
| "logps/chosen": -39.993309020996094, |
| "logps/rejected": -60.90965270996094, |
| "loss": 0.3949, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.37709131836891174, |
| "rewards/margins": 0.8873628973960876, |
| "rewards/rejected": -1.2644541263580322, |
| "step": 5808 |
| }, |
| { |
| "epoch": 3.4584323040380047, |
| "grad_norm": 56.0, |
| "learning_rate": 3.0831353919239905e-07, |
| "logits/chosen": 0.09682201594114304, |
| "logits/rejected": 0.11965522170066833, |
| "logps/chosen": -41.68549346923828, |
| "logps/rejected": -64.51807403564453, |
| "loss": 0.3576, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.36759233474731445, |
| "rewards/margins": 1.0216158628463745, |
| "rewards/rejected": -1.389208197593689, |
| "step": 5824 |
| }, |
| { |
| "epoch": 3.467933491686461, |
| "grad_norm": 47.25, |
| "learning_rate": 3.0641330166270784e-07, |
| "logits/chosen": 0.04335436224937439, |
| "logits/rejected": 0.09136922657489777, |
| "logps/chosen": -41.07777404785156, |
| "logps/rejected": -62.33134460449219, |
| "loss": 0.3905, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.41379624605178833, |
| "rewards/margins": 0.8802096843719482, |
| "rewards/rejected": -1.2940058708190918, |
| "step": 5840 |
| }, |
| { |
| "epoch": 3.477434679334917, |
| "grad_norm": 46.75, |
| "learning_rate": 3.0451306413301664e-07, |
| "logits/chosen": 0.07700560986995697, |
| "logits/rejected": 0.11378947645425797, |
| "logps/chosen": -41.164493560791016, |
| "logps/rejected": -62.910099029541016, |
| "loss": 0.3575, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.35974451899528503, |
| "rewards/margins": 0.9829455614089966, |
| "rewards/rejected": -1.3426902294158936, |
| "step": 5856 |
| }, |
| { |
| "epoch": 3.486935866983373, |
| "grad_norm": 37.5, |
| "learning_rate": 3.026128266033254e-07, |
| "logits/chosen": -0.009967565536499023, |
| "logits/rejected": 0.13184106349945068, |
| "logps/chosen": -41.89753723144531, |
| "logps/rejected": -65.06842041015625, |
| "loss": 0.373, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.43801864981651306, |
| "rewards/margins": 0.9392563104629517, |
| "rewards/rejected": -1.377274990081787, |
| "step": 5872 |
| }, |
| { |
| "epoch": 3.496437054631829, |
| "grad_norm": 47.25, |
| "learning_rate": 3.007125890736342e-07, |
| "logits/chosen": 0.0052648792043328285, |
| "logits/rejected": 0.1504189670085907, |
| "logps/chosen": -40.17364501953125, |
| "logps/rejected": -62.997554779052734, |
| "loss": 0.3612, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3528364896774292, |
| "rewards/margins": 0.9979708194732666, |
| "rewards/rejected": -1.3508071899414062, |
| "step": 5888 |
| }, |
| { |
| "epoch": 3.505938242280285, |
| "grad_norm": 54.75, |
| "learning_rate": 2.98812351543943e-07, |
| "logits/chosen": 0.017038684338331223, |
| "logits/rejected": 0.06112390756607056, |
| "logps/chosen": -41.39101791381836, |
| "logps/rejected": -64.11455535888672, |
| "loss": 0.386, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4848855137825012, |
| "rewards/margins": 0.9375608563423157, |
| "rewards/rejected": -1.4224462509155273, |
| "step": 5904 |
| }, |
| { |
| "epoch": 3.5154394299287413, |
| "grad_norm": 49.75, |
| "learning_rate": 2.969121140142518e-07, |
| "logits/chosen": 0.020917030051350594, |
| "logits/rejected": 0.09810462594032288, |
| "logps/chosen": -39.80662155151367, |
| "logps/rejected": -60.70354080200195, |
| "loss": 0.3528, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3079432249069214, |
| "rewards/margins": 1.035301685333252, |
| "rewards/rejected": -1.3432449102401733, |
| "step": 5920 |
| }, |
| { |
| "epoch": 3.5249406175771973, |
| "grad_norm": 48.25, |
| "learning_rate": 2.9501187648456057e-07, |
| "logits/chosen": 0.07180722057819366, |
| "logits/rejected": 0.20187009871006012, |
| "logps/chosen": -42.24882507324219, |
| "logps/rejected": -66.23828887939453, |
| "loss": 0.3684, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.36295828223228455, |
| "rewards/margins": 0.9907328486442566, |
| "rewards/rejected": -1.3536912202835083, |
| "step": 5936 |
| }, |
| { |
| "epoch": 3.5344418052256534, |
| "grad_norm": 55.75, |
| "learning_rate": 2.9311163895486937e-07, |
| "logits/chosen": 0.04826946556568146, |
| "logits/rejected": 0.18203888833522797, |
| "logps/chosen": -44.12788772583008, |
| "logps/rejected": -65.50685119628906, |
| "loss": 0.4068, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.45171403884887695, |
| "rewards/margins": 0.8484476208686829, |
| "rewards/rejected": -1.300161600112915, |
| "step": 5952 |
| }, |
| { |
| "epoch": 3.5439429928741095, |
| "grad_norm": 67.5, |
| "learning_rate": 2.912114014251781e-07, |
| "logits/chosen": 0.03645118325948715, |
| "logits/rejected": 0.10001954436302185, |
| "logps/chosen": -41.21052551269531, |
| "logps/rejected": -59.9468994140625, |
| "loss": 0.4013, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.432812362909317, |
| "rewards/margins": 0.8651708364486694, |
| "rewards/rejected": -1.297983169555664, |
| "step": 5968 |
| }, |
| { |
| "epoch": 3.553444180522565, |
| "grad_norm": 59.0, |
| "learning_rate": 2.893111638954869e-07, |
| "logits/chosen": 0.014936832711100578, |
| "logits/rejected": 0.13327737152576447, |
| "logps/chosen": -39.648658752441406, |
| "logps/rejected": -64.09784698486328, |
| "loss": 0.3456, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.34701600670814514, |
| "rewards/margins": 1.0596061944961548, |
| "rewards/rejected": -1.4066221714019775, |
| "step": 5984 |
| }, |
| { |
| "epoch": 3.5629453681710213, |
| "grad_norm": 67.0, |
| "learning_rate": 2.874109263657957e-07, |
| "logits/chosen": -0.038970720022916794, |
| "logits/rejected": 0.1254507154226303, |
| "logps/chosen": -39.5800666809082, |
| "logps/rejected": -63.40620422363281, |
| "loss": 0.3871, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3893817365169525, |
| "rewards/margins": 0.9221255779266357, |
| "rewards/rejected": -1.3115073442459106, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.5724465558194773, |
| "grad_norm": 46.25, |
| "learning_rate": 2.855106888361045e-07, |
| "logits/chosen": 0.027300620451569557, |
| "logits/rejected": 0.14734551310539246, |
| "logps/chosen": -39.858192443847656, |
| "logps/rejected": -64.0389633178711, |
| "loss": 0.365, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.40028518438339233, |
| "rewards/margins": 0.9700754284858704, |
| "rewards/rejected": -1.3703604936599731, |
| "step": 6016 |
| }, |
| { |
| "epoch": 3.5819477434679334, |
| "grad_norm": 50.0, |
| "learning_rate": 2.836104513064133e-07, |
| "logits/chosen": 0.056467145681381226, |
| "logits/rejected": 0.12261079996824265, |
| "logps/chosen": -40.67634963989258, |
| "logps/rejected": -62.402809143066406, |
| "loss": 0.3536, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.3845675587654114, |
| "rewards/margins": 1.0051778554916382, |
| "rewards/rejected": -1.3897454738616943, |
| "step": 6032 |
| }, |
| { |
| "epoch": 3.5914489311163895, |
| "grad_norm": 55.25, |
| "learning_rate": 2.8171021377672204e-07, |
| "logits/chosen": 0.10420281440019608, |
| "logits/rejected": 0.13047069311141968, |
| "logps/chosen": -40.67375946044922, |
| "logps/rejected": -61.41259765625, |
| "loss": 0.3815, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3753702640533447, |
| "rewards/margins": 0.9167128205299377, |
| "rewards/rejected": -1.2920830249786377, |
| "step": 6048 |
| }, |
| { |
| "epoch": 3.6009501187648456, |
| "grad_norm": 68.0, |
| "learning_rate": 2.7980997624703084e-07, |
| "logits/chosen": 0.05103810504078865, |
| "logits/rejected": 0.10168743878602982, |
| "logps/chosen": -41.25544738769531, |
| "logps/rejected": -60.72285461425781, |
| "loss": 0.3892, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.3910224437713623, |
| "rewards/margins": 0.9060119390487671, |
| "rewards/rejected": -1.2970343828201294, |
| "step": 6064 |
| }, |
| { |
| "epoch": 3.6104513064133017, |
| "grad_norm": 54.25, |
| "learning_rate": 2.7790973871733964e-07, |
| "logits/chosen": 0.013073207810521126, |
| "logits/rejected": 0.12633521854877472, |
| "logps/chosen": -41.78628921508789, |
| "logps/rejected": -66.28352355957031, |
| "loss": 0.353, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.40143895149230957, |
| "rewards/margins": 1.0364677906036377, |
| "rewards/rejected": -1.4379067420959473, |
| "step": 6080 |
| }, |
| { |
| "epoch": 3.619952494061758, |
| "grad_norm": 50.75, |
| "learning_rate": 2.7600950118764843e-07, |
| "logits/chosen": 0.08037324994802475, |
| "logits/rejected": 0.17064939439296722, |
| "logps/chosen": -42.0279655456543, |
| "logps/rejected": -64.91426086425781, |
| "loss": 0.3877, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.43144869804382324, |
| "rewards/margins": 0.8975583910942078, |
| "rewards/rejected": -1.3290070295333862, |
| "step": 6096 |
| }, |
| { |
| "epoch": 3.629453681710214, |
| "grad_norm": 56.5, |
| "learning_rate": 2.7410926365795723e-07, |
| "logits/chosen": 0.055456578731536865, |
| "logits/rejected": 0.22028645873069763, |
| "logps/chosen": -39.780517578125, |
| "logps/rejected": -63.519813537597656, |
| "loss": 0.3566, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3360990881919861, |
| "rewards/margins": 0.9839707612991333, |
| "rewards/rejected": -1.3200697898864746, |
| "step": 6112 |
| }, |
| { |
| "epoch": 3.63895486935867, |
| "grad_norm": 39.5, |
| "learning_rate": 2.7220902612826603e-07, |
| "logits/chosen": 0.060147836804389954, |
| "logits/rejected": 0.1302787959575653, |
| "logps/chosen": -41.454872131347656, |
| "logps/rejected": -64.60635375976562, |
| "loss": 0.3871, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.4015429615974426, |
| "rewards/margins": 0.9040000438690186, |
| "rewards/rejected": -1.3055431842803955, |
| "step": 6128 |
| }, |
| { |
| "epoch": 3.648456057007126, |
| "grad_norm": 47.25, |
| "learning_rate": 2.703087885985748e-07, |
| "logits/chosen": -0.013463707640767097, |
| "logits/rejected": 0.07376192510128021, |
| "logps/chosen": -39.62220764160156, |
| "logps/rejected": -62.20975875854492, |
| "loss": 0.363, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.35915249586105347, |
| "rewards/margins": 0.9867640733718872, |
| "rewards/rejected": -1.3459166288375854, |
| "step": 6144 |
| }, |
| { |
| "epoch": 3.6579572446555817, |
| "grad_norm": 49.75, |
| "learning_rate": 2.684085510688836e-07, |
| "logits/chosen": 0.010295089334249496, |
| "logits/rejected": 0.17823557555675507, |
| "logps/chosen": -41.6696662902832, |
| "logps/rejected": -68.72691345214844, |
| "loss": 0.3555, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4070267975330353, |
| "rewards/margins": 1.028420329093933, |
| "rewards/rejected": -1.4354472160339355, |
| "step": 6160 |
| }, |
| { |
| "epoch": 3.667458432304038, |
| "grad_norm": 37.0, |
| "learning_rate": 2.665083135391924e-07, |
| "logits/chosen": 0.10623430460691452, |
| "logits/rejected": 0.16167762875556946, |
| "logps/chosen": -42.180198669433594, |
| "logps/rejected": -66.37389373779297, |
| "loss": 0.3498, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.4009030759334564, |
| "rewards/margins": 1.0136104822158813, |
| "rewards/rejected": -1.4145135879516602, |
| "step": 6176 |
| }, |
| { |
| "epoch": 3.676959619952494, |
| "grad_norm": 51.5, |
| "learning_rate": 2.646080760095012e-07, |
| "logits/chosen": 0.048040006309747696, |
| "logits/rejected": 0.14534232020378113, |
| "logps/chosen": -43.282859802246094, |
| "logps/rejected": -66.5873794555664, |
| "loss": 0.3543, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.4304080605506897, |
| "rewards/margins": 1.0149825811386108, |
| "rewards/rejected": -1.4453905820846558, |
| "step": 6192 |
| }, |
| { |
| "epoch": 3.68646080760095, |
| "grad_norm": 47.25, |
| "learning_rate": 2.6270783847980996e-07, |
| "logits/chosen": 0.08240100741386414, |
| "logits/rejected": 0.16390424966812134, |
| "logps/chosen": -40.04043960571289, |
| "logps/rejected": -62.7987060546875, |
| "loss": 0.3618, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.33928748965263367, |
| "rewards/margins": 0.9862152934074402, |
| "rewards/rejected": -1.325502634048462, |
| "step": 6208 |
| }, |
| { |
| "epoch": 3.695961995249406, |
| "grad_norm": 80.5, |
| "learning_rate": 2.6080760095011876e-07, |
| "logits/chosen": 0.021123535931110382, |
| "logits/rejected": 0.1238473653793335, |
| "logps/chosen": -40.08449935913086, |
| "logps/rejected": -64.84993743896484, |
| "loss": 0.3652, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3657268285751343, |
| "rewards/margins": 1.0157949924468994, |
| "rewards/rejected": -1.3815219402313232, |
| "step": 6224 |
| }, |
| { |
| "epoch": 3.705463182897862, |
| "grad_norm": 56.25, |
| "learning_rate": 2.589073634204275e-07, |
| "logits/chosen": 0.09684562683105469, |
| "logits/rejected": 0.18495050072669983, |
| "logps/chosen": -41.702301025390625, |
| "logps/rejected": -63.34584426879883, |
| "loss": 0.3913, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.4372140169143677, |
| "rewards/margins": 0.9003235101699829, |
| "rewards/rejected": -1.3375376462936401, |
| "step": 6240 |
| }, |
| { |
| "epoch": 3.7149643705463182, |
| "grad_norm": 40.25, |
| "learning_rate": 2.570071258907363e-07, |
| "logits/chosen": 0.028134455904364586, |
| "logits/rejected": 0.16499164700508118, |
| "logps/chosen": -41.335540771484375, |
| "logps/rejected": -63.30027770996094, |
| "loss": 0.3824, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.4398137331008911, |
| "rewards/margins": 0.9075998067855835, |
| "rewards/rejected": -1.347413420677185, |
| "step": 6256 |
| }, |
| { |
| "epoch": 3.7244655581947743, |
| "grad_norm": 41.75, |
| "learning_rate": 2.551068883610451e-07, |
| "logits/chosen": 0.016426438465714455, |
| "logits/rejected": 0.13184240460395813, |
| "logps/chosen": -40.241729736328125, |
| "logps/rejected": -61.8122673034668, |
| "loss": 0.3795, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.35463353991508484, |
| "rewards/margins": 0.9521807432174683, |
| "rewards/rejected": -1.3068143129348755, |
| "step": 6272 |
| }, |
| { |
| "epoch": 3.7339667458432304, |
| "grad_norm": 50.5, |
| "learning_rate": 2.532066508313539e-07, |
| "logits/chosen": 0.028402097523212433, |
| "logits/rejected": 0.19204524159431458, |
| "logps/chosen": -41.228050231933594, |
| "logps/rejected": -63.15939712524414, |
| "loss": 0.3886, |
| "rewards/accuracies": 0.8984375, |
| "rewards/chosen": -0.4359303116798401, |
| "rewards/margins": 0.902795135974884, |
| "rewards/rejected": -1.3387255668640137, |
| "step": 6288 |
| }, |
| { |
| "epoch": 3.7434679334916865, |
| "grad_norm": 49.75, |
| "learning_rate": 2.513064133016627e-07, |
| "logits/chosen": 0.03932081162929535, |
| "logits/rejected": 0.220359206199646, |
| "logps/chosen": -39.12273025512695, |
| "logps/rejected": -63.374549865722656, |
| "loss": 0.384, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.32400816679000854, |
| "rewards/margins": 0.9151921272277832, |
| "rewards/rejected": -1.239200234413147, |
| "step": 6304 |
| }, |
| { |
| "epoch": 3.7529691211401426, |
| "grad_norm": 35.25, |
| "learning_rate": 2.494061757719715e-07, |
| "logits/chosen": 0.0723482072353363, |
| "logits/rejected": 0.16996870934963226, |
| "logps/chosen": -42.35596466064453, |
| "logps/rejected": -63.216426849365234, |
| "loss": 0.3757, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.47743964195251465, |
| "rewards/margins": 0.9289887547492981, |
| "rewards/rejected": -1.4064284563064575, |
| "step": 6320 |
| }, |
| { |
| "epoch": 3.7624703087885987, |
| "grad_norm": 51.25, |
| "learning_rate": 2.475059382422803e-07, |
| "logits/chosen": 0.08452095091342926, |
| "logits/rejected": 0.16239051520824432, |
| "logps/chosen": -42.04669189453125, |
| "logps/rejected": -65.5211181640625, |
| "loss": 0.3721, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.42908474802970886, |
| "rewards/margins": 0.9674755930900574, |
| "rewards/rejected": -1.3965604305267334, |
| "step": 6336 |
| }, |
| { |
| "epoch": 3.7719714964370548, |
| "grad_norm": 39.5, |
| "learning_rate": 2.456057007125891e-07, |
| "logits/chosen": 0.028654370456933975, |
| "logits/rejected": 0.0740758553147316, |
| "logps/chosen": -41.6036262512207, |
| "logps/rejected": -61.02503967285156, |
| "loss": 0.409, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.42152711749076843, |
| "rewards/margins": 0.8335566520690918, |
| "rewards/rejected": -1.2550837993621826, |
| "step": 6352 |
| }, |
| { |
| "epoch": 3.781472684085511, |
| "grad_norm": 34.75, |
| "learning_rate": 2.437054631828979e-07, |
| "logits/chosen": 0.05283776670694351, |
| "logits/rejected": 0.15333330631256104, |
| "logps/chosen": -40.32767868041992, |
| "logps/rejected": -63.946720123291016, |
| "loss": 0.3505, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3486955463886261, |
| "rewards/margins": 1.0453296899795532, |
| "rewards/rejected": -1.394025444984436, |
| "step": 6368 |
| }, |
| { |
| "epoch": 3.790973871733967, |
| "grad_norm": 44.25, |
| "learning_rate": 2.418052256532066e-07, |
| "logits/chosen": 0.030136309564113617, |
| "logits/rejected": 0.09111961722373962, |
| "logps/chosen": -43.44806671142578, |
| "logps/rejected": -64.65709686279297, |
| "loss": 0.3773, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.48328760266304016, |
| "rewards/margins": 0.9284934997558594, |
| "rewards/rejected": -1.4117810726165771, |
| "step": 6384 |
| }, |
| { |
| "epoch": 3.800475059382423, |
| "grad_norm": 69.5, |
| "learning_rate": 2.399049881235154e-07, |
| "logits/chosen": -0.005254952237010002, |
| "logits/rejected": 0.14121603965759277, |
| "logps/chosen": -39.49591827392578, |
| "logps/rejected": -61.99911880493164, |
| "loss": 0.3777, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.3417325019836426, |
| "rewards/margins": 0.9448993802070618, |
| "rewards/rejected": -1.2866318225860596, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.809976247030879, |
| "grad_norm": 59.75, |
| "learning_rate": 2.380047505938242e-07, |
| "logits/chosen": 0.11867545545101166, |
| "logits/rejected": 0.2170010209083557, |
| "logps/chosen": -40.522804260253906, |
| "logps/rejected": -64.37654876708984, |
| "loss": 0.3459, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": -0.36430996656417847, |
| "rewards/margins": 1.050180435180664, |
| "rewards/rejected": -1.4144904613494873, |
| "step": 6416 |
| }, |
| { |
| "epoch": 3.8194774346793348, |
| "grad_norm": 46.75, |
| "learning_rate": 2.36104513064133e-07, |
| "logits/chosen": 0.09830942749977112, |
| "logits/rejected": 0.1840059906244278, |
| "logps/chosen": -40.83268356323242, |
| "logps/rejected": -63.659610748291016, |
| "loss": 0.3769, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4339376389980316, |
| "rewards/margins": 0.9776801466941833, |
| "rewards/rejected": -1.4116177558898926, |
| "step": 6432 |
| }, |
| { |
| "epoch": 3.828978622327791, |
| "grad_norm": 54.0, |
| "learning_rate": 2.342042755344418e-07, |
| "logits/chosen": 0.03068104013800621, |
| "logits/rejected": 0.1621265709400177, |
| "logps/chosen": -43.22722625732422, |
| "logps/rejected": -66.03999328613281, |
| "loss": 0.4005, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.4799574315547943, |
| "rewards/margins": 0.8937349915504456, |
| "rewards/rejected": -1.373692512512207, |
| "step": 6448 |
| }, |
| { |
| "epoch": 3.838479809976247, |
| "grad_norm": 50.0, |
| "learning_rate": 2.323040380047506e-07, |
| "logits/chosen": 0.058121610432863235, |
| "logits/rejected": 0.12576644122600555, |
| "logps/chosen": -41.369140625, |
| "logps/rejected": -59.46518325805664, |
| "loss": 0.3966, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.40906572341918945, |
| "rewards/margins": 0.8850200176239014, |
| "rewards/rejected": -1.2940857410430908, |
| "step": 6464 |
| }, |
| { |
| "epoch": 3.847980997624703, |
| "grad_norm": 52.75, |
| "learning_rate": 2.3040380047505937e-07, |
| "logits/chosen": 0.06827502697706223, |
| "logits/rejected": 0.14090177416801453, |
| "logps/chosen": -42.05507278442383, |
| "logps/rejected": -63.853248596191406, |
| "loss": 0.3718, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.40036866068840027, |
| "rewards/margins": 0.9455254077911377, |
| "rewards/rejected": -1.3458940982818604, |
| "step": 6480 |
| }, |
| { |
| "epoch": 3.857482185273159, |
| "grad_norm": 43.75, |
| "learning_rate": 2.2850356294536814e-07, |
| "logits/chosen": 0.029515203088521957, |
| "logits/rejected": 0.14883099496364594, |
| "logps/chosen": -41.896156311035156, |
| "logps/rejected": -64.60520935058594, |
| "loss": 0.3798, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.389555424451828, |
| "rewards/margins": 0.9199389815330505, |
| "rewards/rejected": -1.3094943761825562, |
| "step": 6496 |
| }, |
| { |
| "epoch": 3.866983372921615, |
| "grad_norm": 39.75, |
| "learning_rate": 2.2660332541567694e-07, |
| "logits/chosen": 0.005287522915750742, |
| "logits/rejected": 0.15816916525363922, |
| "logps/chosen": -41.17705535888672, |
| "logps/rejected": -63.35408401489258, |
| "loss": 0.4071, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -0.4392343759536743, |
| "rewards/margins": 0.8662595748901367, |
| "rewards/rejected": -1.305493950843811, |
| "step": 6512 |
| }, |
| { |
| "epoch": 3.8764845605700713, |
| "grad_norm": 46.0, |
| "learning_rate": 2.2470308788598574e-07, |
| "logits/chosen": 0.07539123296737671, |
| "logits/rejected": 0.12418274581432343, |
| "logps/chosen": -41.912052154541016, |
| "logps/rejected": -64.26847076416016, |
| "loss": 0.3873, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.4488585293292999, |
| "rewards/margins": 0.9296143054962158, |
| "rewards/rejected": -1.3784728050231934, |
| "step": 6528 |
| }, |
| { |
| "epoch": 3.8859857482185274, |
| "grad_norm": 47.5, |
| "learning_rate": 2.2280285035629453e-07, |
| "logits/chosen": 0.04668428376317024, |
| "logits/rejected": 0.13457974791526794, |
| "logps/chosen": -40.552486419677734, |
| "logps/rejected": -63.59270477294922, |
| "loss": 0.371, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.3902091681957245, |
| "rewards/margins": 0.9766399264335632, |
| "rewards/rejected": -1.3668489456176758, |
| "step": 6544 |
| }, |
| { |
| "epoch": 3.8954869358669835, |
| "grad_norm": 51.75, |
| "learning_rate": 2.209026128266033e-07, |
| "logits/chosen": 0.019167862832546234, |
| "logits/rejected": 0.10863950103521347, |
| "logps/chosen": -40.27218246459961, |
| "logps/rejected": -63.5718994140625, |
| "loss": 0.355, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.400563508272171, |
| "rewards/margins": 1.0269160270690918, |
| "rewards/rejected": -1.4274795055389404, |
| "step": 6560 |
| }, |
| { |
| "epoch": 3.9049881235154396, |
| "grad_norm": 40.5, |
| "learning_rate": 2.190023752969121e-07, |
| "logits/chosen": 0.032968372106552124, |
| "logits/rejected": 0.1309332251548767, |
| "logps/chosen": -39.79338073730469, |
| "logps/rejected": -63.8279914855957, |
| "loss": 0.3562, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.40201592445373535, |
| "rewards/margins": 1.0143156051635742, |
| "rewards/rejected": -1.4163316488265991, |
| "step": 6576 |
| }, |
| { |
| "epoch": 3.9144893111638956, |
| "grad_norm": 42.75, |
| "learning_rate": 2.171021377672209e-07, |
| "logits/chosen": 0.06808724999427795, |
| "logits/rejected": 0.1723027527332306, |
| "logps/chosen": -39.6837272644043, |
| "logps/rejected": -63.00306701660156, |
| "loss": 0.3484, |
| "rewards/accuracies": 0.9453125, |
| "rewards/chosen": -0.3555365800857544, |
| "rewards/margins": 1.008614420890808, |
| "rewards/rejected": -1.3641510009765625, |
| "step": 6592 |
| }, |
| { |
| "epoch": 3.9239904988123513, |
| "grad_norm": 49.0, |
| "learning_rate": 2.1520190023752967e-07, |
| "logits/chosen": 0.08254537731409073, |
| "logits/rejected": 0.11683779954910278, |
| "logps/chosen": -40.13639450073242, |
| "logps/rejected": -59.88142776489258, |
| "loss": 0.3912, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.36244481801986694, |
| "rewards/margins": 0.8833273649215698, |
| "rewards/rejected": -1.245772123336792, |
| "step": 6608 |
| }, |
| { |
| "epoch": 3.9334916864608074, |
| "grad_norm": 55.25, |
| "learning_rate": 2.1330166270783847e-07, |
| "logits/chosen": 0.017773086205124855, |
| "logits/rejected": 0.12275815010070801, |
| "logps/chosen": -41.630889892578125, |
| "logps/rejected": -64.5182876586914, |
| "loss": 0.3651, |
| "rewards/accuracies": 0.9140625, |
| "rewards/chosen": -0.39863353967666626, |
| "rewards/margins": 0.9960266351699829, |
| "rewards/rejected": -1.3946601152420044, |
| "step": 6624 |
| }, |
| { |
| "epoch": 3.9429928741092635, |
| "grad_norm": 44.25, |
| "learning_rate": 2.1140142517814726e-07, |
| "logits/chosen": 0.027539458125829697, |
| "logits/rejected": 0.1340925693511963, |
| "logps/chosen": -40.22187042236328, |
| "logps/rejected": -62.83479690551758, |
| "loss": 0.3761, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.42398306727409363, |
| "rewards/margins": 0.9288855195045471, |
| "rewards/rejected": -1.352868676185608, |
| "step": 6640 |
| }, |
| { |
| "epoch": 3.9524940617577196, |
| "grad_norm": 61.75, |
| "learning_rate": 2.0950118764845603e-07, |
| "logits/chosen": 0.031753845512866974, |
| "logits/rejected": 0.13772860169410706, |
| "logps/chosen": -41.15782928466797, |
| "logps/rejected": -64.47724914550781, |
| "loss": 0.3647, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.38045597076416016, |
| "rewards/margins": 1.0232834815979004, |
| "rewards/rejected": -1.4037394523620605, |
| "step": 6656 |
| }, |
| { |
| "epoch": 3.9619952494061756, |
| "grad_norm": 57.25, |
| "learning_rate": 2.0760095011876483e-07, |
| "logits/chosen": 0.0039275167509913445, |
| "logits/rejected": 0.13760821521282196, |
| "logps/chosen": -39.61132049560547, |
| "logps/rejected": -63.1502685546875, |
| "loss": 0.3637, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.3538341522216797, |
| "rewards/margins": 0.979717493057251, |
| "rewards/rejected": -1.3335516452789307, |
| "step": 6672 |
| }, |
| { |
| "epoch": 3.9714964370546317, |
| "grad_norm": 58.5, |
| "learning_rate": 2.0570071258907363e-07, |
| "logits/chosen": -0.005867550149559975, |
| "logits/rejected": 0.10856153070926666, |
| "logps/chosen": -40.953468322753906, |
| "logps/rejected": -64.8403091430664, |
| "loss": 0.3771, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.37854593992233276, |
| "rewards/margins": 0.982690155506134, |
| "rewards/rejected": -1.3612359762191772, |
| "step": 6688 |
| }, |
| { |
| "epoch": 3.980997624703088, |
| "grad_norm": 46.0, |
| "learning_rate": 2.0380047505938242e-07, |
| "logits/chosen": 0.07198520749807358, |
| "logits/rejected": 0.0809374749660492, |
| "logps/chosen": -40.85484313964844, |
| "logps/rejected": -63.39386749267578, |
| "loss": 0.379, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4362155795097351, |
| "rewards/margins": 0.933577835559845, |
| "rewards/rejected": -1.3697935342788696, |
| "step": 6704 |
| }, |
| { |
| "epoch": 3.990498812351544, |
| "grad_norm": 57.5, |
| "learning_rate": 2.0190023752969122e-07, |
| "logits/chosen": 0.05437842011451721, |
| "logits/rejected": 0.1582392305135727, |
| "logps/chosen": -41.71647262573242, |
| "logps/rejected": -66.48292541503906, |
| "loss": 0.3764, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.4557511806488037, |
| "rewards/margins": 0.9482683539390564, |
| "rewards/rejected": -1.4040195941925049, |
| "step": 6720 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 65.5, |
| "learning_rate": 2e-07, |
| "logits/chosen": 0.005993685685098171, |
| "logits/rejected": 0.11152346432209015, |
| "logps/chosen": -41.31070327758789, |
| "logps/rejected": -63.83424758911133, |
| "loss": 0.3716, |
| "rewards/accuracies": 0.9296875, |
| "rewards/chosen": -0.42573827505111694, |
| "rewards/margins": 0.946398138999939, |
| "rewards/rejected": -1.3721364736557007, |
| "step": 6736 |
| } |
| ], |
| "logging_steps": 16, |
| "max_steps": 8420, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|