| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.986221294363257, |
| "eval_steps": 500, |
| "global_step": 447, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006680584551148226, |
| "grad_norm": 0.3030683696269989, |
| "learning_rate": 0.0001599980242003563, |
| "logits/chosen": 0.23207515478134155, |
| "logits/rejected": 0.5862225890159607, |
| "logps/chosen": -172.75270080566406, |
| "logps/rejected": -94.1683120727539, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.013361169102296452, |
| "grad_norm": 0.25661855936050415, |
| "learning_rate": 0.00015999209689901978, |
| "logits/chosen": 0.250579833984375, |
| "logits/rejected": 0.654366672039032, |
| "logps/chosen": -170.5427703857422, |
| "logps/rejected": -94.58740234375, |
| "loss": 0.6682, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.057050298899412155, |
| "rewards/margins": 0.05467259883880615, |
| "rewards/rejected": 0.0023776949383318424, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.020041753653444676, |
| "grad_norm": 0.3576202988624573, |
| "learning_rate": 0.00015998221838876944, |
| "logits/chosen": 0.2386956363916397, |
| "logits/rejected": 0.585329532623291, |
| "logps/chosen": -165.6811981201172, |
| "logps/rejected": -128.58689880371094, |
| "loss": 0.6185, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.1190996840596199, |
| "rewards/margins": 0.12703385949134827, |
| "rewards/rejected": -0.007934181950986385, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.026722338204592903, |
| "grad_norm": 0.3903304636478424, |
| "learning_rate": 0.00015996838915755424, |
| "logits/chosen": 0.2555890679359436, |
| "logits/rejected": 0.6079045534133911, |
| "logps/chosen": -192.27386474609375, |
| "logps/rejected": -115.53170013427734, |
| "loss": 0.5817, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": 0.24757207930088043, |
| "rewards/margins": 0.26733314990997314, |
| "rewards/rejected": -0.0197611041367054, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.033402922755741124, |
| "grad_norm": 0.3334524929523468, |
| "learning_rate": 0.0001599506098884689, |
| "logits/chosen": 0.12432317435741425, |
| "logits/rejected": 0.5883125066757202, |
| "logps/chosen": -182.6539306640625, |
| "logps/rejected": -128.27572631835938, |
| "loss": 0.5411, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.3643491268157959, |
| "rewards/margins": 0.40698686242103577, |
| "rewards/rejected": -0.04263775050640106, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04008350730688935, |
| "grad_norm": 0.32326623797416687, |
| "learning_rate": 0.00015992888145972026, |
| "logits/chosen": 0.2656134366989136, |
| "logits/rejected": 0.6499578952789307, |
| "logps/chosen": -178.80812072753906, |
| "logps/rejected": -128.95472717285156, |
| "loss": 0.5078, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 0.5447542071342468, |
| "rewards/margins": 0.5555691719055176, |
| "rewards/rejected": -0.010814988985657692, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04676409185803758, |
| "grad_norm": 0.31647607684135437, |
| "learning_rate": 0.00015990320494458385, |
| "logits/chosen": 0.2741105556488037, |
| "logits/rejected": 0.6408557891845703, |
| "logps/chosen": -173.88148498535156, |
| "logps/rejected": -119.0029525756836, |
| "loss": 0.4447, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.6545026302337646, |
| "rewards/margins": 0.7013029456138611, |
| "rewards/rejected": -0.046800337731838226, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.05344467640918581, |
| "grad_norm": 0.32367509603500366, |
| "learning_rate": 0.00015987358161135095, |
| "logits/chosen": 0.30019262433052063, |
| "logits/rejected": 0.8890558481216431, |
| "logps/chosen": -213.016357421875, |
| "logps/rejected": -108.3148422241211, |
| "loss": 0.4205, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.7475125193595886, |
| "rewards/margins": 0.8885018825531006, |
| "rewards/rejected": -0.14098937809467316, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.06012526096033403, |
| "grad_norm": 0.2946716248989105, |
| "learning_rate": 0.00015984001292326582, |
| "logits/chosen": -0.012724779546260834, |
| "logits/rejected": 0.8889679908752441, |
| "logps/chosen": -254.97061157226562, |
| "logps/rejected": -110.16943359375, |
| "loss": 0.302, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0533387660980225, |
| "rewards/margins": 1.3181082010269165, |
| "rewards/rejected": -0.2647695243358612, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.06680584551148225, |
| "grad_norm": 0.3166823089122772, |
| "learning_rate": 0.0001598025005384535, |
| "logits/chosen": 0.42483431100845337, |
| "logits/rejected": 0.5721423029899597, |
| "logps/chosen": -147.58172607421875, |
| "logps/rejected": -136.1343536376953, |
| "loss": 0.3091, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 1.1222333908081055, |
| "rewards/margins": 1.2369006872177124, |
| "rewards/rejected": -0.11466731131076813, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07348643006263048, |
| "grad_norm": 0.29748114943504333, |
| "learning_rate": 0.0001597610463098379, |
| "logits/chosen": 0.12727132439613342, |
| "logits/rejected": 0.7985308170318604, |
| "logps/chosen": -216.18954467773438, |
| "logps/rejected": -112.87439727783203, |
| "loss": 0.2774, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": 1.1798498630523682, |
| "rewards/margins": 1.6763423681259155, |
| "rewards/rejected": -0.49649256467819214, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0801670146137787, |
| "grad_norm": 0.31257691979408264, |
| "learning_rate": 0.0001597156522850503, |
| "logits/chosen": 0.4580366611480713, |
| "logits/rejected": 0.707663357257843, |
| "logps/chosen": -148.07301330566406, |
| "logps/rejected": -148.95001220703125, |
| "loss": 0.2414, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 1.0007773637771606, |
| "rewards/margins": 1.4732861518859863, |
| "rewards/rejected": -0.4725087881088257, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.08684759916492693, |
| "grad_norm": 0.22815896570682526, |
| "learning_rate": 0.0001596663207063281, |
| "logits/chosen": 0.146785706281662, |
| "logits/rejected": 0.705903172492981, |
| "logps/chosen": -187.23133850097656, |
| "logps/rejected": -118.27559661865234, |
| "loss": 0.1546, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": 1.2817703485488892, |
| "rewards/margins": 2.2400412559509277, |
| "rewards/rejected": -0.9582710266113281, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.09352818371607516, |
| "grad_norm": 0.2425857037305832, |
| "learning_rate": 0.00015961305401040424, |
| "logits/chosen": 0.1981484740972519, |
| "logits/rejected": 1.001242995262146, |
| "logps/chosen": -239.64752197265625, |
| "logps/rejected": -113.02178192138672, |
| "loss": 0.152, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 1.2698098421096802, |
| "rewards/margins": 2.5462355613708496, |
| "rewards/rejected": -1.2764257192611694, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.10020876826722339, |
| "grad_norm": 0.26788046956062317, |
| "learning_rate": 0.00015955585482838668, |
| "logits/chosen": 0.3972240686416626, |
| "logits/rejected": 0.6294059753417969, |
| "logps/chosen": -179.3887481689453, |
| "logps/rejected": -178.17100524902344, |
| "loss": 0.137, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 1.4892669916152954, |
| "rewards/margins": 2.444074869155884, |
| "rewards/rejected": -0.9548079967498779, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.10688935281837161, |
| "grad_norm": 0.23353877663612366, |
| "learning_rate": 0.0001594947259856285, |
| "logits/chosen": 0.27605149149894714, |
| "logits/rejected": 0.8782747387886047, |
| "logps/chosen": -206.30886840820312, |
| "logps/rejected": -148.47564697265625, |
| "loss": 0.1094, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5769964456558228, |
| "rewards/margins": 3.015477180480957, |
| "rewards/rejected": -1.4384808540344238, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.11356993736951983, |
| "grad_norm": 0.1707775592803955, |
| "learning_rate": 0.00015942967050158835, |
| "logits/chosen": 0.40737321972846985, |
| "logits/rejected": 0.7035366296768188, |
| "logps/chosen": -146.5780029296875, |
| "logps/rejected": -146.81468200683594, |
| "loss": 0.0745, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6468758583068848, |
| "rewards/margins": 3.2545251846313477, |
| "rewards/rejected": -1.6076490879058838, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.12025052192066805, |
| "grad_norm": 0.14162331819534302, |
| "learning_rate": 0.0001593606915896813, |
| "logits/chosen": 0.42682021856307983, |
| "logits/rejected": 0.8648378849029541, |
| "logps/chosen": -172.6350555419922, |
| "logps/rejected": -148.40040588378906, |
| "loss": 0.0609, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.636667013168335, |
| "rewards/margins": 3.53716778755188, |
| "rewards/rejected": -1.9005005359649658, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1269311064718163, |
| "grad_norm": 0.14806503057479858, |
| "learning_rate": 0.00015928779265712004, |
| "logits/chosen": 0.46036726236343384, |
| "logits/rejected": 0.6379547119140625, |
| "logps/chosen": -177.5619659423828, |
| "logps/rejected": -199.31591796875, |
| "loss": 0.057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8629982471466064, |
| "rewards/margins": 3.892867088317871, |
| "rewards/rejected": -2.0298686027526855, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1336116910229645, |
| "grad_norm": 0.077659972012043, |
| "learning_rate": 0.00015921097730474672, |
| "logits/chosen": 0.18374812602996826, |
| "logits/rejected": 1.1149046421051025, |
| "logps/chosen": -188.0821990966797, |
| "logps/rejected": -119.98029327392578, |
| "loss": 0.0272, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9101481437683105, |
| "rewards/margins": 4.403899192810059, |
| "rewards/rejected": -2.49375057220459, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.14029227557411272, |
| "grad_norm": 0.16159941256046295, |
| "learning_rate": 0.000159130249326855, |
| "logits/chosen": 0.3764076828956604, |
| "logits/rejected": 0.7519434094429016, |
| "logps/chosen": -125.24600219726562, |
| "logps/rejected": -124.57368469238281, |
| "loss": 0.0331, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4406747817993164, |
| "rewards/margins": 5.011131286621094, |
| "rewards/rejected": -2.5704567432403564, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.14697286012526095, |
| "grad_norm": 0.08939576894044876, |
| "learning_rate": 0.00015904561271100261, |
| "logits/chosen": 0.384420245885849, |
| "logits/rejected": 0.878984272480011, |
| "logps/chosen": -201.50851440429688, |
| "logps/rejected": -164.9197998046875, |
| "loss": 0.0237, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3332247734069824, |
| "rewards/margins": 4.973855972290039, |
| "rewards/rejected": -2.6406304836273193, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.15365344467640918, |
| "grad_norm": 0.05668778717517853, |
| "learning_rate": 0.00015895707163781446, |
| "logits/chosen": 0.34759849309921265, |
| "logits/rejected": 0.7577741742134094, |
| "logps/chosen": -176.2628936767578, |
| "logps/rejected": -150.50823974609375, |
| "loss": 0.0153, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4216699600219727, |
| "rewards/margins": 5.509364128112793, |
| "rewards/rejected": -3.0876946449279785, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1603340292275574, |
| "grad_norm": 0.07947131991386414, |
| "learning_rate": 0.00015886463048077603, |
| "logits/chosen": 0.36052000522613525, |
| "logits/rejected": 0.8884449005126953, |
| "logps/chosen": -168.50161743164062, |
| "logps/rejected": -148.0092010498047, |
| "loss": 0.0131, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5387940406799316, |
| "rewards/margins": 5.77685546875, |
| "rewards/rejected": -3.2380619049072266, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.16701461377870563, |
| "grad_norm": 0.11461421847343445, |
| "learning_rate": 0.0001587682938060175, |
| "logits/chosen": 0.6103401184082031, |
| "logits/rejected": 0.9240366220474243, |
| "logps/chosen": -148.28834533691406, |
| "logps/rejected": -149.85528564453125, |
| "loss": 0.0189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.372836112976074, |
| "rewards/margins": 6.2512006759643555, |
| "rewards/rejected": -3.8783645629882812, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.17369519832985386, |
| "grad_norm": 0.10785702615976334, |
| "learning_rate": 0.00015866806637208802, |
| "logits/chosen": 0.5904795527458191, |
| "logits/rejected": 0.8780027031898499, |
| "logps/chosen": -167.06285095214844, |
| "logps/rejected": -168.36965942382812, |
| "loss": 0.0131, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 2.5616817474365234, |
| "rewards/margins": 6.0948076248168945, |
| "rewards/rejected": -3.533125400543213, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1803757828810021, |
| "grad_norm": 0.01879948377609253, |
| "learning_rate": 0.0001585639531297208, |
| "logits/chosen": 0.4085114598274231, |
| "logits/rejected": 0.7017614841461182, |
| "logps/chosen": -182.0274200439453, |
| "logps/rejected": -219.75445556640625, |
| "loss": 0.0052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3172097206115723, |
| "rewards/margins": 6.493253231048584, |
| "rewards/rejected": -4.176044464111328, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.18705636743215032, |
| "grad_norm": 0.0552218034863472, |
| "learning_rate": 0.00015845595922158858, |
| "logits/chosen": 0.4179205596446991, |
| "logits/rejected": 0.8640294075012207, |
| "logps/chosen": -163.02249145507812, |
| "logps/rejected": -173.25906372070312, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.3230319023132324, |
| "rewards/margins": 6.0196757316589355, |
| "rewards/rejected": -3.696643829345703, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.19373695198329854, |
| "grad_norm": 0.020818833261728287, |
| "learning_rate": 0.0001583440899820494, |
| "logits/chosen": 0.37070727348327637, |
| "logits/rejected": 0.8000531196594238, |
| "logps/chosen": -153.59396362304688, |
| "logps/rejected": -157.2525634765625, |
| "loss": 0.0051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.583785057067871, |
| "rewards/margins": 6.723965644836426, |
| "rewards/rejected": -4.140180587768555, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.20041753653444677, |
| "grad_norm": 0.022928601130843163, |
| "learning_rate": 0.00015822835093688343, |
| "logits/chosen": 0.37007975578308105, |
| "logits/rejected": 0.6836897730827332, |
| "logps/chosen": -200.75411987304688, |
| "logps/rejected": -197.43679809570312, |
| "loss": 0.0048, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.2626662254333496, |
| "rewards/margins": 6.830512046813965, |
| "rewards/rejected": -4.567845821380615, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.207098121085595, |
| "grad_norm": 0.017020611092448235, |
| "learning_rate": 0.00015810874780301971, |
| "logits/chosen": 0.21970444917678833, |
| "logits/rejected": 0.9238815307617188, |
| "logps/chosen": -153.52171325683594, |
| "logps/rejected": -130.63345336914062, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.0072364807128906, |
| "rewards/margins": 6.51116418838501, |
| "rewards/rejected": -4.503928184509277, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.21377870563674323, |
| "grad_norm": 0.05750842019915581, |
| "learning_rate": 0.000157985286488254, |
| "logits/chosen": 0.29518210887908936, |
| "logits/rejected": 0.8406018018722534, |
| "logps/chosen": -179.242919921875, |
| "logps/rejected": -190.29794311523438, |
| "loss": 0.0136, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4710826873779297, |
| "rewards/margins": 7.065474033355713, |
| "rewards/rejected": -4.594390392303467, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.22045929018789143, |
| "grad_norm": 0.012446871027350426, |
| "learning_rate": 0.00015785797309095684, |
| "logits/chosen": 0.45642712712287903, |
| "logits/rejected": 0.922579288482666, |
| "logps/chosen": -174.11033630371094, |
| "logps/rejected": -154.8909149169922, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4129574298858643, |
| "rewards/margins": 7.372279167175293, |
| "rewards/rejected": -4.959321975708008, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.22713987473903965, |
| "grad_norm": 0.016282295808196068, |
| "learning_rate": 0.00015772681389977238, |
| "logits/chosen": 0.18731066584587097, |
| "logits/rejected": 0.7407510280609131, |
| "logps/chosen": -175.7775421142578, |
| "logps/rejected": -211.89059448242188, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7385621070861816, |
| "rewards/margins": 8.315619468688965, |
| "rewards/rejected": -5.577057361602783, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.23382045929018788, |
| "grad_norm": 0.060803525149822235, |
| "learning_rate": 0.00015759181539330767, |
| "logits/chosen": 0.18983310461044312, |
| "logits/rejected": 0.9841374754905701, |
| "logps/chosen": -193.72940063476562, |
| "logps/rejected": -158.21124267578125, |
| "loss": 0.0069, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.655390501022339, |
| "rewards/margins": 7.861417293548584, |
| "rewards/rejected": -5.206027030944824, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2405010438413361, |
| "grad_norm": 0.014457982033491135, |
| "learning_rate": 0.0001574529842398127, |
| "logits/chosen": 0.35429176688194275, |
| "logits/rejected": 0.8492611050605774, |
| "logps/chosen": -193.26327514648438, |
| "logps/rejected": -194.78515625, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4261109828948975, |
| "rewards/margins": 7.886996269226074, |
| "rewards/rejected": -5.460885524749756, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.24718162839248434, |
| "grad_norm": 0.014244547113776207, |
| "learning_rate": 0.00015731032729685116, |
| "logits/chosen": 0.5617164969444275, |
| "logits/rejected": 0.648545503616333, |
| "logps/chosen": -139.7461700439453, |
| "logps/rejected": -200.63014221191406, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.470811367034912, |
| "rewards/margins": 7.392573833465576, |
| "rewards/rejected": -4.921762466430664, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2538622129436326, |
| "grad_norm": 0.019575441256165504, |
| "learning_rate": 0.0001571638516109614, |
| "logits/chosen": 0.5932599306106567, |
| "logits/rejected": 0.6393769979476929, |
| "logps/chosen": -131.13328552246094, |
| "logps/rejected": -227.80514526367188, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6353516578674316, |
| "rewards/margins": 8.202313423156738, |
| "rewards/rejected": -5.566961288452148, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2605427974947808, |
| "grad_norm": 0.012481000274419785, |
| "learning_rate": 0.00015701356441730864, |
| "logits/chosen": 0.3244459331035614, |
| "logits/rejected": 0.839286208152771, |
| "logps/chosen": -183.51742553710938, |
| "logps/rejected": -186.67271423339844, |
| "loss": 0.002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.6852662563323975, |
| "rewards/margins": 8.163836479187012, |
| "rewards/rejected": -5.478569984436035, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.267223382045929, |
| "grad_norm": 0.0029381215572357178, |
| "learning_rate": 0.00015685947313932744, |
| "logits/chosen": 0.4828110635280609, |
| "logits/rejected": 0.9766800999641418, |
| "logps/chosen": -176.82705688476562, |
| "logps/rejected": -181.6543731689453, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.744213819503784, |
| "rewards/margins": 8.242889404296875, |
| "rewards/rejected": -5.498675346374512, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2739039665970772, |
| "grad_norm": 0.04062644764780998, |
| "learning_rate": 0.00015670158538835517, |
| "logits/chosen": 0.3162868022918701, |
| "logits/rejected": 1.0131462812423706, |
| "logps/chosen": -176.71170043945312, |
| "logps/rejected": -139.9007110595703, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.691941976547241, |
| "rewards/margins": 8.195215225219727, |
| "rewards/rejected": -5.5032734870910645, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.28058455114822545, |
| "grad_norm": 0.007433526683598757, |
| "learning_rate": 0.00015653990896325587, |
| "logits/chosen": 0.14636696875095367, |
| "logits/rejected": 0.8240803480148315, |
| "logps/chosen": -202.27883911132812, |
| "logps/rejected": -203.40887451171875, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.634958267211914, |
| "rewards/margins": 8.168269157409668, |
| "rewards/rejected": -5.533310890197754, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.2872651356993737, |
| "grad_norm": 0.007627190090715885, |
| "learning_rate": 0.00015637445185003504, |
| "logits/chosen": 0.6393700838088989, |
| "logits/rejected": 0.8101121783256531, |
| "logps/chosen": -132.88674926757812, |
| "logps/rejected": -221.85333251953125, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.062117576599121, |
| "rewards/margins": 8.964439392089844, |
| "rewards/rejected": -5.902321815490723, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2939457202505219, |
| "grad_norm": 0.054270580410957336, |
| "learning_rate": 0.00015620522222144543, |
| "logits/chosen": 0.43403318524360657, |
| "logits/rejected": 1.0408865213394165, |
| "logps/chosen": -193.0514678955078, |
| "logps/rejected": -174.92593383789062, |
| "loss": 0.0087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.494380235671997, |
| "rewards/margins": 8.408964157104492, |
| "rewards/rejected": -5.914583206176758, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.30062630480167013, |
| "grad_norm": 0.0030595879070460796, |
| "learning_rate": 0.00015603222843658292, |
| "logits/chosen": 0.22928494215011597, |
| "logits/rejected": 1.1178843975067139, |
| "logps/chosen": -190.24087524414062, |
| "logps/rejected": -148.67306518554688, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1091623306274414, |
| "rewards/margins": 8.748310089111328, |
| "rewards/rejected": -5.6391472816467285, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.30730688935281836, |
| "grad_norm": 0.016060445457696915, |
| "learning_rate": 0.00015585547904047405, |
| "logits/chosen": 0.34157004952430725, |
| "logits/rejected": 0.8962047696113586, |
| "logps/chosen": -220.03871154785156, |
| "logps/rejected": -241.0406494140625, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8193817138671875, |
| "rewards/margins": 9.287981986999512, |
| "rewards/rejected": -6.468600273132324, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.3139874739039666, |
| "grad_norm": 0.025302594527602196, |
| "learning_rate": 0.00015567498276365365, |
| "logits/chosen": 0.25260353088378906, |
| "logits/rejected": 1.1292093992233276, |
| "logps/chosen": -183.6875, |
| "logps/rejected": -163.08114624023438, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.396714687347412, |
| "rewards/margins": 8.208773612976074, |
| "rewards/rejected": -5.812058925628662, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.3206680584551148, |
| "grad_norm": 0.010507023893296719, |
| "learning_rate": 0.00015549074852173368, |
| "logits/chosen": 0.3296445608139038, |
| "logits/rejected": 1.195299506187439, |
| "logps/chosen": -186.541748046875, |
| "logps/rejected": -154.01535034179688, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7873411178588867, |
| "rewards/margins": 8.289080619812012, |
| "rewards/rejected": -5.501739501953125, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.32734864300626304, |
| "grad_norm": 0.00997474230825901, |
| "learning_rate": 0.00015530278541496292, |
| "logits/chosen": 0.3625122308731079, |
| "logits/rejected": 1.0217641592025757, |
| "logps/chosen": -140.2364959716797, |
| "logps/rejected": -147.66769409179688, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.462085723876953, |
| "rewards/margins": 8.511833190917969, |
| "rewards/rejected": -6.049746990203857, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.33402922755741127, |
| "grad_norm": 0.012138652615249157, |
| "learning_rate": 0.00015511110272777733, |
| "logits/chosen": 0.5160237550735474, |
| "logits/rejected": 0.9644973874092102, |
| "logps/chosen": -168.16012573242188, |
| "logps/rejected": -173.67738342285156, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5186946392059326, |
| "rewards/margins": 8.781152725219727, |
| "rewards/rejected": -6.262458801269531, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3407098121085595, |
| "grad_norm": 0.013811074197292328, |
| "learning_rate": 0.00015491570992834155, |
| "logits/chosen": 0.45710933208465576, |
| "logits/rejected": 0.9616823196411133, |
| "logps/chosen": -134.65382385253906, |
| "logps/rejected": -165.4757537841797, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9232048988342285, |
| "rewards/margins": 8.802949905395508, |
| "rewards/rejected": -5.879745006561279, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3473903966597077, |
| "grad_norm": 0.003540891222655773, |
| "learning_rate": 0.00015471661666808116, |
| "logits/chosen": 0.5379828214645386, |
| "logits/rejected": 0.9311552047729492, |
| "logps/chosen": -166.2515869140625, |
| "logps/rejected": -226.817626953125, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7226719856262207, |
| "rewards/margins": 9.967523574829102, |
| "rewards/rejected": -7.244852542877197, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.35407098121085595, |
| "grad_norm": 0.046603377908468246, |
| "learning_rate": 0.00015451383278120595, |
| "logits/chosen": 0.482962429523468, |
| "logits/rejected": 0.8294156789779663, |
| "logps/chosen": -173.58953857421875, |
| "logps/rejected": -164.0615692138672, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1282169818878174, |
| "rewards/margins": 8.867436408996582, |
| "rewards/rejected": -5.739219665527344, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3607515657620042, |
| "grad_norm": 0.02518213540315628, |
| "learning_rate": 0.00015430736828422423, |
| "logits/chosen": 0.2752196192741394, |
| "logits/rejected": 1.1645457744598389, |
| "logps/chosen": -199.2365264892578, |
| "logps/rejected": -145.12139892578125, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.693560838699341, |
| "rewards/margins": 8.52237319946289, |
| "rewards/rejected": -5.828812599182129, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3674321503131524, |
| "grad_norm": 0.043834567070007324, |
| "learning_rate": 0.00015409723337544802, |
| "logits/chosen": 0.4665435254573822, |
| "logits/rejected": 0.968167781829834, |
| "logps/chosen": -175.5590362548828, |
| "logps/rejected": -201.70126342773438, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8582777976989746, |
| "rewards/margins": 9.58537769317627, |
| "rewards/rejected": -6.727100372314453, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.37411273486430063, |
| "grad_norm": 0.02259964495897293, |
| "learning_rate": 0.0001538834384344892, |
| "logits/chosen": 0.26438379287719727, |
| "logits/rejected": 0.9069167375564575, |
| "logps/chosen": -190.77597045898438, |
| "logps/rejected": -203.91160583496094, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2503395080566406, |
| "rewards/margins": 9.630882263183594, |
| "rewards/rejected": -6.380542278289795, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.38079331941544886, |
| "grad_norm": 0.00614876439794898, |
| "learning_rate": 0.00015366599402174703, |
| "logits/chosen": 0.19551321864128113, |
| "logits/rejected": 1.087761640548706, |
| "logps/chosen": -226.3440399169922, |
| "logps/rejected": -211.6017608642578, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.834458112716675, |
| "rewards/margins": 9.265405654907227, |
| "rewards/rejected": -6.430947303771973, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3874739039665971, |
| "grad_norm": 0.0061728451400995255, |
| "learning_rate": 0.00015344491087788633, |
| "logits/chosen": 0.3328951299190521, |
| "logits/rejected": 0.9735016822814941, |
| "logps/chosen": -172.78604125976562, |
| "logps/rejected": -179.15667724609375, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.01570725440979, |
| "rewards/margins": 9.582061767578125, |
| "rewards/rejected": -6.566355228424072, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3941544885177453, |
| "grad_norm": 0.0012235452886670828, |
| "learning_rate": 0.00015322019992330702, |
| "logits/chosen": 0.4755726158618927, |
| "logits/rejected": 0.8112475872039795, |
| "logps/chosen": -154.75039672851562, |
| "logps/rejected": -201.81065368652344, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7610487937927246, |
| "rewards/margins": 9.628297805786133, |
| "rewards/rejected": -6.86724853515625, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.40083507306889354, |
| "grad_norm": 0.027884479612112045, |
| "learning_rate": 0.00015299187225760469, |
| "logits/chosen": 0.17077681422233582, |
| "logits/rejected": 0.9662021398544312, |
| "logps/chosen": -171.18963623046875, |
| "logps/rejected": -194.4243927001953, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.726745128631592, |
| "rewards/margins": 8.980852127075195, |
| "rewards/rejected": -6.254107475280762, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.40751565762004177, |
| "grad_norm": 0.012179279699921608, |
| "learning_rate": 0.00015275993915902234, |
| "logits/chosen": 0.3292789161205292, |
| "logits/rejected": 0.7503560185432434, |
| "logps/chosen": -175.94473266601562, |
| "logps/rejected": -210.44918823242188, |
| "loss": 0.0012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.879286289215088, |
| "rewards/margins": 9.31381607055664, |
| "rewards/rejected": -6.434530258178711, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.41419624217119, |
| "grad_norm": 0.0661226138472557, |
| "learning_rate": 0.00015252441208389334, |
| "logits/chosen": 0.48564329743385315, |
| "logits/rejected": 0.6276620030403137, |
| "logps/chosen": -124.23971557617188, |
| "logps/rejected": -221.02198791503906, |
| "loss": 0.0085, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 2.9825873374938965, |
| "rewards/margins": 9.826833724975586, |
| "rewards/rejected": -6.844245910644531, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.4208768267223382, |
| "grad_norm": 0.004125866107642651, |
| "learning_rate": 0.00015228530266607547, |
| "logits/chosen": 0.27578750252723694, |
| "logits/rejected": 1.1573642492294312, |
| "logps/chosen": -197.88607788085938, |
| "logps/rejected": -168.08474731445312, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4376907348632812, |
| "rewards/margins": 9.881976127624512, |
| "rewards/rejected": -6.444284915924072, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.42755741127348645, |
| "grad_norm": 0.003275892697274685, |
| "learning_rate": 0.00015204262271637626, |
| "logits/chosen": 0.6008961796760559, |
| "logits/rejected": 1.0901458263397217, |
| "logps/chosen": -185.306396484375, |
| "logps/rejected": -202.59896850585938, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3544652462005615, |
| "rewards/margins": 9.659427642822266, |
| "rewards/rejected": -6.304962158203125, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.4342379958246347, |
| "grad_norm": 0.006054996512830257, |
| "learning_rate": 0.00015179638422196966, |
| "logits/chosen": 0.6647149324417114, |
| "logits/rejected": 1.1419864892959595, |
| "logps/chosen": -122.66973876953125, |
| "logps/rejected": -164.39332580566406, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.812810182571411, |
| "rewards/margins": 8.833253860473633, |
| "rewards/rejected": -6.020443439483643, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.44091858037578285, |
| "grad_norm": 0.10174047201871872, |
| "learning_rate": 0.00015154659934580396, |
| "logits/chosen": 0.3900471329689026, |
| "logits/rejected": 0.7904311418533325, |
| "logps/chosen": -191.52008056640625, |
| "logps/rejected": -219.46322631835938, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0001416206359863, |
| "rewards/margins": 9.584066390991211, |
| "rewards/rejected": -6.583923816680908, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.4475991649269311, |
| "grad_norm": 0.024550337344408035, |
| "learning_rate": 0.00015129328042600085, |
| "logits/chosen": 0.33518272638320923, |
| "logits/rejected": 0.980431079864502, |
| "logps/chosen": -177.56134033203125, |
| "logps/rejected": -182.8128662109375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8862996101379395, |
| "rewards/margins": 10.160045623779297, |
| "rewards/rejected": -7.273746490478516, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.4542797494780793, |
| "grad_norm": 0.003426956245675683, |
| "learning_rate": 0.00015103643997524613, |
| "logits/chosen": 0.4200201630592346, |
| "logits/rejected": 0.9040957689285278, |
| "logps/chosen": -223.8280487060547, |
| "logps/rejected": -291.58636474609375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0368287563323975, |
| "rewards/margins": 10.13497543334961, |
| "rewards/rejected": -7.098146438598633, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.46096033402922754, |
| "grad_norm": 0.003953616600483656, |
| "learning_rate": 0.00015077609068017158, |
| "logits/chosen": 0.6471998691558838, |
| "logits/rejected": 1.0458431243896484, |
| "logps/chosen": -170.61874389648438, |
| "logps/rejected": -188.6359100341797, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.938751220703125, |
| "rewards/margins": 9.758254051208496, |
| "rewards/rejected": -6.819502830505371, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.46764091858037576, |
| "grad_norm": 0.004953494295477867, |
| "learning_rate": 0.00015051224540072833, |
| "logits/chosen": 0.43476998805999756, |
| "logits/rejected": 0.8195680379867554, |
| "logps/chosen": -158.63465881347656, |
| "logps/rejected": -213.90623474121094, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9456708431243896, |
| "rewards/margins": 9.627756118774414, |
| "rewards/rejected": -6.68208646774292, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.474321503131524, |
| "grad_norm": 0.017343413084745407, |
| "learning_rate": 0.00015024491716955155, |
| "logits/chosen": 0.5195021629333496, |
| "logits/rejected": 0.8773779273033142, |
| "logps/chosen": -136.5732879638672, |
| "logps/rejected": -194.87118530273438, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0906143188476562, |
| "rewards/margins": 9.986026763916016, |
| "rewards/rejected": -6.895412445068359, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4810020876826722, |
| "grad_norm": 0.008785056881606579, |
| "learning_rate": 0.00014997411919131688, |
| "logits/chosen": 0.5617407560348511, |
| "logits/rejected": 0.8960329294204712, |
| "logps/chosen": -163.903076171875, |
| "logps/rejected": -235.6993408203125, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1653547286987305, |
| "rewards/margins": 9.831454277038574, |
| "rewards/rejected": -6.666099548339844, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.48768267223382045, |
| "grad_norm": 0.008580000139772892, |
| "learning_rate": 0.00014969986484208804, |
| "logits/chosen": 0.44829118251800537, |
| "logits/rejected": 0.797157347202301, |
| "logps/chosen": -136.55909729003906, |
| "logps/rejected": -190.699951171875, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.086763381958008, |
| "rewards/margins": 9.880305290222168, |
| "rewards/rejected": -6.79354190826416, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4943632567849687, |
| "grad_norm": 0.0018130596727132797, |
| "learning_rate": 0.0001494221676686562, |
| "logits/chosen": 0.3543122410774231, |
| "logits/rejected": 0.8907362222671509, |
| "logps/chosen": -172.5308837890625, |
| "logps/rejected": -191.48504638671875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.289935350418091, |
| "rewards/margins": 9.893710136413574, |
| "rewards/rejected": -6.603774547576904, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5010438413361169, |
| "grad_norm": 0.009158944711089134, |
| "learning_rate": 0.00014914104138787066, |
| "logits/chosen": 0.2868165373802185, |
| "logits/rejected": 1.0966495275497437, |
| "logps/chosen": -196.56903076171875, |
| "logps/rejected": -199.89080810546875, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9512715339660645, |
| "rewards/margins": 9.58918571472168, |
| "rewards/rejected": -6.637913703918457, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5077244258872652, |
| "grad_norm": 0.07474283874034882, |
| "learning_rate": 0.0001488564998859617, |
| "logits/chosen": 0.3425526022911072, |
| "logits/rejected": 0.9148508310317993, |
| "logps/chosen": -136.7733917236328, |
| "logps/rejected": -164.56271362304688, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.152519941329956, |
| "rewards/margins": 9.47095775604248, |
| "rewards/rejected": -6.318437576293945, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.5144050104384134, |
| "grad_norm": 0.009138119406998158, |
| "learning_rate": 0.00014856855721785415, |
| "logits/chosen": 0.40808552503585815, |
| "logits/rejected": 1.0737789869308472, |
| "logps/chosen": -209.9980926513672, |
| "logps/rejected": -198.7295379638672, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3080811500549316, |
| "rewards/margins": 10.055679321289062, |
| "rewards/rejected": -6.747599124908447, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.5210855949895616, |
| "grad_norm": 0.014706511981785297, |
| "learning_rate": 0.0001482772276064736, |
| "logits/chosen": 0.3385770916938782, |
| "logits/rejected": 0.9576174020767212, |
| "logps/chosen": -195.38328552246094, |
| "logps/rejected": -244.77413940429688, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3216552734375, |
| "rewards/margins": 10.63253402709961, |
| "rewards/rejected": -7.310879230499268, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.5277661795407098, |
| "grad_norm": 0.08255796134471893, |
| "learning_rate": 0.00014798252544204361, |
| "logits/chosen": 0.6491683125495911, |
| "logits/rejected": 0.956652045249939, |
| "logps/chosen": -123.93269348144531, |
| "logps/rejected": -199.91995239257812, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1648573875427246, |
| "rewards/margins": 10.019696235656738, |
| "rewards/rejected": -6.854838848114014, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.534446764091858, |
| "grad_norm": 0.007771719712764025, |
| "learning_rate": 0.00014768446528137493, |
| "logits/chosen": 0.4897231459617615, |
| "logits/rejected": 1.0199142694473267, |
| "logps/chosen": -155.5507049560547, |
| "logps/rejected": -182.4271697998047, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8543262481689453, |
| "rewards/margins": 9.827340126037598, |
| "rewards/rejected": -6.9730143547058105, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5411273486430063, |
| "grad_norm": 0.0030905655585229397, |
| "learning_rate": 0.00014738306184714658, |
| "logits/chosen": 0.38616618514060974, |
| "logits/rejected": 1.0583640336990356, |
| "logps/chosen": -181.91766357421875, |
| "logps/rejected": -216.1337890625, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.401458501815796, |
| "rewards/margins": 10.563034057617188, |
| "rewards/rejected": -7.1615753173828125, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.5478079331941544, |
| "grad_norm": 0.0021012630313634872, |
| "learning_rate": 0.0001470783300271785, |
| "logits/chosen": 0.2328442931175232, |
| "logits/rejected": 0.9704186916351318, |
| "logps/chosen": -179.87081909179688, |
| "logps/rejected": -180.01779174804688, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.238947868347168, |
| "rewards/margins": 9.60936164855957, |
| "rewards/rejected": -6.370413780212402, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.5544885177453027, |
| "grad_norm": 0.04149683937430382, |
| "learning_rate": 0.0001467702848736962, |
| "logits/chosen": 0.34571874141693115, |
| "logits/rejected": 0.7980022430419922, |
| "logps/chosen": -121.0694580078125, |
| "logps/rejected": -168.23104858398438, |
| "loss": 0.0035, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.338881254196167, |
| "rewards/margins": 10.039016723632812, |
| "rewards/rejected": -6.700134754180908, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.5611691022964509, |
| "grad_norm": 0.019571220502257347, |
| "learning_rate": 0.0001464589416025873, |
| "logits/chosen": 0.46891170740127563, |
| "logits/rejected": 0.9267268180847168, |
| "logps/chosen": -137.26438903808594, |
| "logps/rejected": -200.45758056640625, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.055349349975586, |
| "rewards/margins": 10.106170654296875, |
| "rewards/rejected": -7.050821304321289, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.5678496868475992, |
| "grad_norm": 0.003243156708776951, |
| "learning_rate": 0.00014614431559264993, |
| "logits/chosen": 0.5420711636543274, |
| "logits/rejected": 0.8642836213111877, |
| "logps/chosen": -153.92691040039062, |
| "logps/rejected": -197.93504333496094, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.184312105178833, |
| "rewards/margins": 10.237167358398438, |
| "rewards/rejected": -7.052854537963867, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5745302713987473, |
| "grad_norm": 0.0025193586479872465, |
| "learning_rate": 0.00014582642238483302, |
| "logits/chosen": 0.39913490414619446, |
| "logits/rejected": 1.0773463249206543, |
| "logps/chosen": -165.12124633789062, |
| "logps/rejected": -179.71060180664062, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0881643295288086, |
| "rewards/margins": 10.198201179504395, |
| "rewards/rejected": -7.110036849975586, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5812108559498956, |
| "grad_norm": 0.0010882590431720018, |
| "learning_rate": 0.00014550527768146876, |
| "logits/chosen": 0.26313066482543945, |
| "logits/rejected": 1.0774075984954834, |
| "logps/chosen": -205.03379821777344, |
| "logps/rejected": -198.33372497558594, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.51377272605896, |
| "rewards/margins": 10.64950180053711, |
| "rewards/rejected": -7.13572883605957, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5878914405010438, |
| "grad_norm": 0.0017716643633320928, |
| "learning_rate": 0.0001451808973454969, |
| "logits/chosen": 0.45582157373428345, |
| "logits/rejected": 1.0374003648757935, |
| "logps/chosen": -170.92657470703125, |
| "logps/rejected": -203.78793334960938, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.345822811126709, |
| "rewards/margins": 10.260464668273926, |
| "rewards/rejected": -6.914642810821533, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5945720250521921, |
| "grad_norm": 0.0024310583248734474, |
| "learning_rate": 0.0001448532973996812, |
| "logits/chosen": 0.2980806827545166, |
| "logits/rejected": 1.233319878578186, |
| "logps/chosen": -184.93099975585938, |
| "logps/rejected": -159.75274658203125, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6145825386047363, |
| "rewards/margins": 9.834606170654297, |
| "rewards/rejected": -6.220023155212402, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.6012526096033403, |
| "grad_norm": 0.0589352585375309, |
| "learning_rate": 0.00014452249402581818, |
| "logits/chosen": 0.43372777104377747, |
| "logits/rejected": 1.0277624130249023, |
| "logps/chosen": -192.56484985351562, |
| "logps/rejected": -211.60171508789062, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5561540126800537, |
| "rewards/margins": 10.808131217956543, |
| "rewards/rejected": -7.251977920532227, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6079331941544885, |
| "grad_norm": 0.0015669305576011539, |
| "learning_rate": 0.00014418850356393744, |
| "logits/chosen": 0.4860686659812927, |
| "logits/rejected": 0.8380767107009888, |
| "logps/chosen": -135.60870361328125, |
| "logps/rejected": -176.65524291992188, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0018136501312256, |
| "rewards/margins": 9.445623397827148, |
| "rewards/rejected": -6.443809509277344, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.6146137787056367, |
| "grad_norm": 0.0015854910016059875, |
| "learning_rate": 0.0001438513425114949, |
| "logits/chosen": 0.24818000197410583, |
| "logits/rejected": 1.0943326950073242, |
| "logps/chosen": -241.65411376953125, |
| "logps/rejected": -227.60733032226562, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6303694248199463, |
| "rewards/margins": 10.740053176879883, |
| "rewards/rejected": -7.109683990478516, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.621294363256785, |
| "grad_norm": 0.06920412927865982, |
| "learning_rate": 0.00014351102752255778, |
| "logits/chosen": 0.4860806465148926, |
| "logits/rejected": 0.9092923402786255, |
| "logps/chosen": -194.13475036621094, |
| "logps/rejected": -225.56948852539062, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6053314208984375, |
| "rewards/margins": 10.585766792297363, |
| "rewards/rejected": -6.980436325073242, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.6279749478079332, |
| "grad_norm": 0.08025512099266052, |
| "learning_rate": 0.00014316757540698186, |
| "logits/chosen": 0.7010557651519775, |
| "logits/rejected": 0.9352781772613525, |
| "logps/chosen": -147.06973266601562, |
| "logps/rejected": -196.08779907226562, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9433255195617676, |
| "rewards/margins": 9.722973823547363, |
| "rewards/rejected": -6.779648303985596, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.6346555323590815, |
| "grad_norm": 0.008386881090700626, |
| "learning_rate": 0.0001428210031295814, |
| "logits/chosen": 0.6669158339500427, |
| "logits/rejected": 1.0123670101165771, |
| "logps/chosen": -142.99716186523438, |
| "logps/rejected": -229.2365264892578, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2911269664764404, |
| "rewards/margins": 10.677725791931152, |
| "rewards/rejected": -7.386599063873291, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.6413361169102296, |
| "grad_norm": 0.01323284488171339, |
| "learning_rate": 0.00014247132780929091, |
| "logits/chosen": 0.5180701613426208, |
| "logits/rejected": 1.1706314086914062, |
| "logps/chosen": -155.06080627441406, |
| "logps/rejected": -180.8107452392578, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.032214641571045, |
| "rewards/margins": 10.255094528198242, |
| "rewards/rejected": -7.2228803634643555, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.6480167014613779, |
| "grad_norm": 0.006948838476091623, |
| "learning_rate": 0.00014211856671831973, |
| "logits/chosen": 0.4092825651168823, |
| "logits/rejected": 0.9364159107208252, |
| "logps/chosen": -182.318603515625, |
| "logps/rejected": -217.2329864501953, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8276522159576416, |
| "rewards/margins": 10.5891695022583, |
| "rewards/rejected": -7.76151704788208, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.6546972860125261, |
| "grad_norm": 0.006203535012900829, |
| "learning_rate": 0.00014176273728129879, |
| "logits/chosen": 0.38271790742874146, |
| "logits/rejected": 0.9502532482147217, |
| "logps/chosen": -154.72279357910156, |
| "logps/rejected": -199.6136016845703, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.429482936859131, |
| "rewards/margins": 10.404837608337402, |
| "rewards/rejected": -6.9753546714782715, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.6613778705636744, |
| "grad_norm": 0.13446320593357086, |
| "learning_rate": 0.00014140385707442002, |
| "logits/chosen": 0.5989990234375, |
| "logits/rejected": 0.9662174582481384, |
| "logps/chosen": -157.0098114013672, |
| "logps/rejected": -225.11416625976562, |
| "loss": 0.0122, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3204352855682373, |
| "rewards/margins": 10.797783851623535, |
| "rewards/rejected": -7.477348804473877, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.6680584551148225, |
| "grad_norm": 0.01066310703754425, |
| "learning_rate": 0.000141041943824568, |
| "logits/chosen": 0.4070190191268921, |
| "logits/rejected": 0.9769365191459656, |
| "logps/chosen": -178.2224884033203, |
| "logps/rejected": -192.3170928955078, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1867244243621826, |
| "rewards/margins": 10.239897727966309, |
| "rewards/rejected": -7.053173065185547, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6747390396659708, |
| "grad_norm": 0.004759893286973238, |
| "learning_rate": 0.00014067701540844443, |
| "logits/chosen": 0.38733264803886414, |
| "logits/rejected": 0.9725967049598694, |
| "logps/chosen": -163.582763671875, |
| "logps/rejected": -200.43734741210938, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2934625148773193, |
| "rewards/margins": 10.841292381286621, |
| "rewards/rejected": -7.547830104827881, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.681419624217119, |
| "grad_norm": 0.06473153084516525, |
| "learning_rate": 0.00014030908985168528, |
| "logits/chosen": 0.4140828251838684, |
| "logits/rejected": 1.0431495904922485, |
| "logps/chosen": -152.635986328125, |
| "logps/rejected": -180.54466247558594, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3867475986480713, |
| "rewards/margins": 10.397819519042969, |
| "rewards/rejected": -7.011071681976318, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6881002087682673, |
| "grad_norm": 0.041281044483184814, |
| "learning_rate": 0.00013993818532797008, |
| "logits/chosen": 0.5268477201461792, |
| "logits/rejected": 0.9111321568489075, |
| "logps/chosen": -168.5952911376953, |
| "logps/rejected": -216.87393188476562, |
| "loss": 0.0021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4760243892669678, |
| "rewards/margins": 10.898151397705078, |
| "rewards/rejected": -7.422128677368164, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6947807933194154, |
| "grad_norm": 0.025793112814426422, |
| "learning_rate": 0.0001395643201581245, |
| "logits/chosen": 0.5013325214385986, |
| "logits/rejected": 0.8294863700866699, |
| "logps/chosen": -146.08856201171875, |
| "logps/rejected": -211.77963256835938, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.200756311416626, |
| "rewards/margins": 10.40932559967041, |
| "rewards/rejected": -7.208569526672363, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.7014613778705637, |
| "grad_norm": 0.0016948337433859706, |
| "learning_rate": 0.00013918751280921527, |
| "logits/chosen": 0.3914889395236969, |
| "logits/rejected": 0.7578558325767517, |
| "logps/chosen": -137.25982666015625, |
| "logps/rejected": -199.191650390625, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.24421763420105, |
| "rewards/margins": 10.255487442016602, |
| "rewards/rejected": -7.0112690925598145, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7081419624217119, |
| "grad_norm": 0.0030184737406671047, |
| "learning_rate": 0.000138807781893638, |
| "logits/chosen": 0.47848889231681824, |
| "logits/rejected": 1.006292462348938, |
| "logps/chosen": -141.8809814453125, |
| "logps/rejected": -167.41851806640625, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.821171998977661, |
| "rewards/margins": 10.16208553314209, |
| "rewards/rejected": -7.34091329574585, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.7148225469728601, |
| "grad_norm": 0.0008359827334061265, |
| "learning_rate": 0.00013842514616819795, |
| "logits/chosen": 0.553492546081543, |
| "logits/rejected": 1.0241787433624268, |
| "logps/chosen": -167.76641845703125, |
| "logps/rejected": -223.5001678466797, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6072874069213867, |
| "rewards/margins": 11.082305908203125, |
| "rewards/rejected": -7.475017070770264, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.7215031315240084, |
| "grad_norm": 0.0023584573063999414, |
| "learning_rate": 0.00013803962453318332, |
| "logits/chosen": 0.4638338088989258, |
| "logits/rejected": 1.1057603359222412, |
| "logps/chosen": -137.7595672607422, |
| "logps/rejected": -139.77247619628906, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4429550170898438, |
| "rewards/margins": 10.039093017578125, |
| "rewards/rejected": -6.596138000488281, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.7281837160751565, |
| "grad_norm": 0.0010937312617897987, |
| "learning_rate": 0.00013765123603143187, |
| "logits/chosen": 0.5443668365478516, |
| "logits/rejected": 1.1449006795883179, |
| "logps/chosen": -195.05966186523438, |
| "logps/rejected": -221.8257598876953, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.427518606185913, |
| "rewards/margins": 10.773955345153809, |
| "rewards/rejected": -7.346436023712158, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.7348643006263048, |
| "grad_norm": 0.005583143327385187, |
| "learning_rate": 0.00013725999984739014, |
| "logits/chosen": 0.5298680067062378, |
| "logits/rejected": 0.9729467034339905, |
| "logps/chosen": -179.80484008789062, |
| "logps/rejected": -245.03363037109375, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2088518142700195, |
| "rewards/margins": 10.993154525756836, |
| "rewards/rejected": -7.784302711486816, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.741544885177453, |
| "grad_norm": 0.002138708718121052, |
| "learning_rate": 0.000136865935306166, |
| "logits/chosen": 0.3394404649734497, |
| "logits/rejected": 1.089263916015625, |
| "logps/chosen": -182.08953857421875, |
| "logps/rejected": -191.70120239257812, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5019545555114746, |
| "rewards/margins": 10.58714485168457, |
| "rewards/rejected": -7.0851898193359375, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.7482254697286013, |
| "grad_norm": 0.0015473664971068501, |
| "learning_rate": 0.00013646906187257392, |
| "logits/chosen": 0.2853686213493347, |
| "logits/rejected": 1.1013542413711548, |
| "logps/chosen": -190.174560546875, |
| "logps/rejected": -198.12901306152344, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5565602779388428, |
| "rewards/margins": 11.150199890136719, |
| "rewards/rejected": -7.593639373779297, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.7549060542797494, |
| "grad_norm": 0.0007681334391236305, |
| "learning_rate": 0.00013606939915017366, |
| "logits/chosen": 0.5230981111526489, |
| "logits/rejected": 1.0380409955978394, |
| "logps/chosen": -145.13079833984375, |
| "logps/rejected": -166.50582885742188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.462148427963257, |
| "rewards/margins": 10.77736759185791, |
| "rewards/rejected": -7.315218925476074, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.7615866388308977, |
| "grad_norm": 0.0014410597505047917, |
| "learning_rate": 0.00013566696688030176, |
| "logits/chosen": 0.21239212155342102, |
| "logits/rejected": 0.8984243869781494, |
| "logps/chosen": -207.76397705078125, |
| "logps/rejected": -216.22067260742188, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6719069480895996, |
| "rewards/margins": 11.330296516418457, |
| "rewards/rejected": -7.658390045166016, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.7682672233820459, |
| "grad_norm": 0.006789306178689003, |
| "learning_rate": 0.00013526178494109668, |
| "logits/chosen": 0.433882474899292, |
| "logits/rejected": 1.0958993434906006, |
| "logps/chosen": -198.0896453857422, |
| "logps/rejected": -203.357666015625, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.284191370010376, |
| "rewards/margins": 11.191015243530273, |
| "rewards/rejected": -7.906824111938477, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.7749478079331942, |
| "grad_norm": 0.0014478856464847922, |
| "learning_rate": 0.00013485387334651668, |
| "logits/chosen": 0.32002153992652893, |
| "logits/rejected": 1.0241080522537231, |
| "logps/chosen": -151.71717834472656, |
| "logps/rejected": -176.21511840820312, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.194765567779541, |
| "rewards/margins": 10.118311882019043, |
| "rewards/rejected": -6.923546314239502, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.7816283924843423, |
| "grad_norm": 0.008634977042675018, |
| "learning_rate": 0.00013444325224535127, |
| "logits/chosen": 0.43800267577171326, |
| "logits/rejected": 0.780940592288971, |
| "logps/chosen": -107.12379455566406, |
| "logps/rejected": -186.25064086914062, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.841928243637085, |
| "rewards/margins": 10.173989295959473, |
| "rewards/rejected": -7.332061290740967, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.7883089770354906, |
| "grad_norm": 0.0018734281184151769, |
| "learning_rate": 0.00013402994192022622, |
| "logits/chosen": 0.4912789762020111, |
| "logits/rejected": 0.8206208944320679, |
| "logps/chosen": -147.78268432617188, |
| "logps/rejected": -207.8667755126953, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.988457202911377, |
| "rewards/margins": 10.380359649658203, |
| "rewards/rejected": -7.391902446746826, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.7949895615866388, |
| "grad_norm": 0.0015072772512212396, |
| "learning_rate": 0.00013361396278660124, |
| "logits/chosen": 0.3757545053958893, |
| "logits/rejected": 1.0709331035614014, |
| "logps/chosen": -150.7517852783203, |
| "logps/rejected": -183.0996551513672, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7918334007263184, |
| "rewards/margins": 10.951468467712402, |
| "rewards/rejected": -7.159633636474609, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.8016701461377871, |
| "grad_norm": 0.001006263424642384, |
| "learning_rate": 0.00013319533539176199, |
| "logits/chosen": 0.6779341697692871, |
| "logits/rejected": 0.8323581218719482, |
| "logps/chosen": -162.09359741210938, |
| "logps/rejected": -241.7459259033203, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6300947666168213, |
| "rewards/margins": 11.384780883789062, |
| "rewards/rejected": -7.7546868324279785, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8083507306889353, |
| "grad_norm": 0.0022658442612737417, |
| "learning_rate": 0.00013277408041380487, |
| "logits/chosen": 0.7190150022506714, |
| "logits/rejected": 1.058363676071167, |
| "logps/chosen": -142.59063720703125, |
| "logps/rejected": -214.18865966796875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.351846218109131, |
| "rewards/margins": 10.205248832702637, |
| "rewards/rejected": -6.853403091430664, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.8150313152400835, |
| "grad_norm": 0.0006775332149118185, |
| "learning_rate": 0.0001323502186606158, |
| "logits/chosen": 0.46084725856781006, |
| "logits/rejected": 1.1550092697143555, |
| "logps/chosen": -164.25515747070312, |
| "logps/rejected": -191.22218322753906, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5589873790740967, |
| "rewards/margins": 11.493250846862793, |
| "rewards/rejected": -7.934264183044434, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.8217118997912317, |
| "grad_norm": 0.0040510594844818115, |
| "learning_rate": 0.0001319237710688423, |
| "logits/chosen": 0.19903920590877533, |
| "logits/rejected": 1.0057140588760376, |
| "logps/chosen": -189.11306762695312, |
| "logps/rejected": -211.69046020507812, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7651314735412598, |
| "rewards/margins": 11.223003387451172, |
| "rewards/rejected": -7.457871437072754, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.82839248434238, |
| "grad_norm": 0.04469997435808182, |
| "learning_rate": 0.00013149475870285934, |
| "logits/chosen": 0.5379782319068909, |
| "logits/rejected": 1.0742489099502563, |
| "logps/chosen": -186.49488830566406, |
| "logps/rejected": -206.31185913085938, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.179280996322632, |
| "rewards/margins": 10.138044357299805, |
| "rewards/rejected": -6.9587626457214355, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.8350730688935282, |
| "grad_norm": 0.0012717200443148613, |
| "learning_rate": 0.00013106320275372893, |
| "logits/chosen": 0.5852510333061218, |
| "logits/rejected": 1.0125198364257812, |
| "logps/chosen": -156.62803649902344, |
| "logps/rejected": -211.2324981689453, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8485283851623535, |
| "rewards/margins": 11.504820823669434, |
| "rewards/rejected": -7.6562933921813965, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.8417536534446765, |
| "grad_norm": 0.01169919315725565, |
| "learning_rate": 0.00013062912453815336, |
| "logits/chosen": 0.6571882367134094, |
| "logits/rejected": 0.9026492238044739, |
| "logps/chosen": -148.38502502441406, |
| "logps/rejected": -236.60284423828125, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.63680362701416, |
| "rewards/margins": 11.450029373168945, |
| "rewards/rejected": -7.813226699829102, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.8484342379958246, |
| "grad_norm": 0.0030007236637175083, |
| "learning_rate": 0.00013019254549742217, |
| "logits/chosen": 0.45202672481536865, |
| "logits/rejected": 1.183500051498413, |
| "logps/chosen": -163.0814971923828, |
| "logps/rejected": -198.718017578125, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5360257625579834, |
| "rewards/margins": 10.435393333435059, |
| "rewards/rejected": -6.899367809295654, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.8551148225469729, |
| "grad_norm": 0.0010185908759012818, |
| "learning_rate": 0.00012975348719635322, |
| "logits/chosen": 0.6836375594139099, |
| "logits/rejected": 1.2013335227966309, |
| "logps/chosen": -171.42530822753906, |
| "logps/rejected": -199.51210021972656, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4464898109436035, |
| "rewards/margins": 11.003561019897461, |
| "rewards/rejected": -7.557069778442383, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.8617954070981211, |
| "grad_norm": 0.0015769846504554152, |
| "learning_rate": 0.00012931197132222738, |
| "logits/chosen": 0.23104141652584076, |
| "logits/rejected": 1.1797983646392822, |
| "logps/chosen": -193.76705932617188, |
| "logps/rejected": -194.70799255371094, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.822854518890381, |
| "rewards/margins": 11.82970905303955, |
| "rewards/rejected": -8.006855010986328, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.8684759916492694, |
| "grad_norm": 0.0023490425664931536, |
| "learning_rate": 0.00012886801968371733, |
| "logits/chosen": 0.9613852500915527, |
| "logits/rejected": 1.0451734066009521, |
| "logps/chosen": -153.423828125, |
| "logps/rejected": -238.90798950195312, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0533547401428223, |
| "rewards/margins": 11.23104190826416, |
| "rewards/rejected": -8.177685737609863, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8751565762004175, |
| "grad_norm": 0.007329499814659357, |
| "learning_rate": 0.00012842165420981028, |
| "logits/chosen": 0.3598926365375519, |
| "logits/rejected": 1.2872174978256226, |
| "logps/chosen": -184.6826629638672, |
| "logps/rejected": -160.8395233154297, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.779690980911255, |
| "rewards/margins": 10.888734817504883, |
| "rewards/rejected": -7.109042644500732, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.8818371607515657, |
| "grad_norm": 0.0005828408757224679, |
| "learning_rate": 0.00012797289694872483, |
| "logits/chosen": 0.5109158158302307, |
| "logits/rejected": 1.1522945165634155, |
| "logps/chosen": -181.67935180664062, |
| "logps/rejected": -195.45596313476562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.14690899848938, |
| "rewards/margins": 10.518176078796387, |
| "rewards/rejected": -7.371266841888428, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.888517745302714, |
| "grad_norm": 0.006235368084162474, |
| "learning_rate": 0.00012752177006682193, |
| "logits/chosen": 0.3775724470615387, |
| "logits/rejected": 0.9961695671081543, |
| "logps/chosen": -177.385009765625, |
| "logps/rejected": -198.3971405029297, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.827141046524048, |
| "rewards/margins": 11.361750602722168, |
| "rewards/rejected": -7.534610271453857, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.8951983298538622, |
| "grad_norm": 0.006079867482185364, |
| "learning_rate": 0.00012706829584750989, |
| "logits/chosen": 0.7383792400360107, |
| "logits/rejected": 0.7893859148025513, |
| "logps/chosen": -103.3727798461914, |
| "logps/rejected": -196.89271545410156, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7119593620300293, |
| "rewards/margins": 10.615793228149414, |
| "rewards/rejected": -7.903831958770752, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.9018789144050104, |
| "grad_norm": 0.0013690602499991655, |
| "learning_rate": 0.00012661249669014364, |
| "logits/chosen": 0.6606283187866211, |
| "logits/rejected": 0.932214081287384, |
| "logps/chosen": -162.7115936279297, |
| "logps/rejected": -225.05245971679688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3914923667907715, |
| "rewards/margins": 11.141890525817871, |
| "rewards/rejected": -7.750399589538574, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.9085594989561586, |
| "grad_norm": 0.00651137251406908, |
| "learning_rate": 0.0001261543951089186, |
| "logits/chosen": 0.31962257623672485, |
| "logits/rejected": 1.2235876321792603, |
| "logps/chosen": -223.0352325439453, |
| "logps/rejected": -209.3647003173828, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4260380268096924, |
| "rewards/margins": 10.789265632629395, |
| "rewards/rejected": -7.363227367401123, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.9152400835073069, |
| "grad_norm": 0.0014856844209134579, |
| "learning_rate": 0.0001256940137317583, |
| "logits/chosen": 0.48557624220848083, |
| "logits/rejected": 0.7384285926818848, |
| "logps/chosen": -142.57444763183594, |
| "logps/rejected": -209.3195037841797, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1010584831237793, |
| "rewards/margins": 10.799681663513184, |
| "rewards/rejected": -7.6986236572265625, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.9219206680584551, |
| "grad_norm": 0.004290735349059105, |
| "learning_rate": 0.00012523137529919673, |
| "logits/chosen": 0.49820947647094727, |
| "logits/rejected": 1.187336802482605, |
| "logps/chosen": -181.5707550048828, |
| "logps/rejected": -202.8837127685547, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.54292893409729, |
| "rewards/margins": 10.928933143615723, |
| "rewards/rejected": -7.3860039710998535, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.9286012526096034, |
| "grad_norm": 0.003620475996285677, |
| "learning_rate": 0.00012476650266325513, |
| "logits/chosen": 0.25639641284942627, |
| "logits/rejected": 1.1532477140426636, |
| "logps/chosen": -194.07142639160156, |
| "logps/rejected": -179.5195770263672, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.796419620513916, |
| "rewards/margins": 10.853509902954102, |
| "rewards/rejected": -7.057089805603027, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.9352818371607515, |
| "grad_norm": 0.007752254139631987, |
| "learning_rate": 0.00012429941878631324, |
| "logits/chosen": 0.5540122389793396, |
| "logits/rejected": 0.8194867372512817, |
| "logps/chosen": -103.79815673828125, |
| "logps/rejected": -197.13775634765625, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.879499912261963, |
| "rewards/margins": 11.494918823242188, |
| "rewards/rejected": -8.615418434143066, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9419624217118998, |
| "grad_norm": 0.0009465285111218691, |
| "learning_rate": 0.00012383014673997497, |
| "logits/chosen": 0.7288948893547058, |
| "logits/rejected": 0.9930503964424133, |
| "logps/chosen": -141.9916534423828, |
| "logps/rejected": -216.1266326904297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5408124923706055, |
| "rewards/margins": 11.739241600036621, |
| "rewards/rejected": -8.1984281539917, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.948643006263048, |
| "grad_norm": 0.0011045335559174418, |
| "learning_rate": 0.00012335870970392888, |
| "logits/chosen": 0.5019521117210388, |
| "logits/rejected": 1.1962378025054932, |
| "logps/chosen": -200.11459350585938, |
| "logps/rejected": -179.86489868164062, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4744818210601807, |
| "rewards/margins": 11.273345947265625, |
| "rewards/rejected": -7.798864841461182, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.9553235908141963, |
| "grad_norm": 0.024925533682107925, |
| "learning_rate": 0.0001228851309648032, |
| "logits/chosen": 0.6946430802345276, |
| "logits/rejected": 0.8380041122436523, |
| "logps/chosen": -127.19657135009766, |
| "logps/rejected": -245.40231323242188, |
| "loss": 0.0034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5805623531341553, |
| "rewards/margins": 12.349920272827148, |
| "rewards/rejected": -8.769356727600098, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.9620041753653444, |
| "grad_norm": 0.0006273960461840034, |
| "learning_rate": 0.0001224094339150155, |
| "logits/chosen": 0.32063305377960205, |
| "logits/rejected": 1.1136928796768188, |
| "logps/chosen": -182.1404266357422, |
| "logps/rejected": -192.31224060058594, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.556812047958374, |
| "rewards/margins": 11.931962966918945, |
| "rewards/rejected": -8.375152587890625, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.9686847599164927, |
| "grad_norm": 0.013907409273087978, |
| "learning_rate": 0.0001219316420516173, |
| "logits/chosen": 0.3725297451019287, |
| "logits/rejected": 1.098531723022461, |
| "logps/chosen": -166.97886657714844, |
| "logps/rejected": -225.6486358642578, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5993576049804688, |
| "rewards/margins": 11.865777015686035, |
| "rewards/rejected": -8.266420364379883, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.9753653444676409, |
| "grad_norm": 0.008556756190955639, |
| "learning_rate": 0.00012145177897513349, |
| "logits/chosen": 0.4221380949020386, |
| "logits/rejected": 1.1614536046981812, |
| "logps/chosen": -148.64877319335938, |
| "logps/rejected": -174.18954467773438, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.369095802307129, |
| "rewards/margins": 10.837663650512695, |
| "rewards/rejected": -7.468567371368408, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.9820459290187892, |
| "grad_norm": 0.01212705485522747, |
| "learning_rate": 0.0001209698683883964, |
| "logits/chosen": 0.2313823103904724, |
| "logits/rejected": 1.3470351696014404, |
| "logps/chosen": -226.58926391601562, |
| "logps/rejected": -192.19192504882812, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.370995044708252, |
| "rewards/margins": 11.233190536499023, |
| "rewards/rejected": -7.8621954917907715, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.9887265135699373, |
| "grad_norm": 0.0021595736034214497, |
| "learning_rate": 0.00012048593409537522, |
| "logits/chosen": 0.33942633867263794, |
| "logits/rejected": 1.0542570352554321, |
| "logps/chosen": -157.76954650878906, |
| "logps/rejected": -191.26931762695312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.679450273513794, |
| "rewards/margins": 11.785932540893555, |
| "rewards/rejected": -8.106481552124023, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.9954070981210856, |
| "grad_norm": 0.011656565591692924, |
| "learning_rate": 0.00012000000000000002, |
| "logits/chosen": 0.5189492702484131, |
| "logits/rejected": 1.1787470579147339, |
| "logps/chosen": -188.0403289794922, |
| "logps/rejected": -217.3338165283203, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.895737648010254, |
| "rewards/margins": 11.713899612426758, |
| "rewards/rejected": -7.8181633949279785, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.0020876826722338, |
| "grad_norm": 0.0011344770900905132, |
| "learning_rate": 0.00011951209010498108, |
| "logits/chosen": 0.4898693561553955, |
| "logits/rejected": 0.9766267538070679, |
| "logps/chosen": -173.29742431640625, |
| "logps/rejected": -209.09970092773438, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.360459089279175, |
| "rewards/margins": 10.934969902038574, |
| "rewards/rejected": -7.574510097503662, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.008768267223382, |
| "grad_norm": 0.0007721176953054965, |
| "learning_rate": 0.0001190222285106234, |
| "logits/chosen": 0.40774810314178467, |
| "logits/rejected": 1.1688655614852905, |
| "logps/chosen": -172.59530639648438, |
| "logps/rejected": -188.3610076904297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3076529502868652, |
| "rewards/margins": 11.588592529296875, |
| "rewards/rejected": -8.280941009521484, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.0154488517745304, |
| "grad_norm": 0.0009479824802838266, |
| "learning_rate": 0.00011853043941363599, |
| "logits/chosen": 0.5603910684585571, |
| "logits/rejected": 1.2070255279541016, |
| "logps/chosen": -171.68991088867188, |
| "logps/rejected": -202.64581298828125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.053154706954956, |
| "rewards/margins": 11.318292617797852, |
| "rewards/rejected": -8.265138626098633, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.0221294363256785, |
| "grad_norm": 0.0007491550641134381, |
| "learning_rate": 0.00011803674710593694, |
| "logits/chosen": 0.5270896553993225, |
| "logits/rejected": 0.9994916915893555, |
| "logps/chosen": -143.41436767578125, |
| "logps/rejected": -200.78506469726562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.390288829803467, |
| "rewards/margins": 11.925804138183594, |
| "rewards/rejected": -8.535514831542969, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.0288100208768267, |
| "grad_norm": 0.0009609659318812191, |
| "learning_rate": 0.00011754117597345342, |
| "logits/chosen": 0.44007253646850586, |
| "logits/rejected": 1.2215617895126343, |
| "logps/chosen": -137.77011108398438, |
| "logps/rejected": -157.72262573242188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.361788034439087, |
| "rewards/margins": 10.739198684692383, |
| "rewards/rejected": -7.377409934997559, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.0354906054279749, |
| "grad_norm": 0.00042587798088788986, |
| "learning_rate": 0.00011704375049491706, |
| "logits/chosen": 0.5652292370796204, |
| "logits/rejected": 0.945173978805542, |
| "logps/chosen": -145.53587341308594, |
| "logps/rejected": -234.8934326171875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2489700317382812, |
| "rewards/margins": 11.937629699707031, |
| "rewards/rejected": -8.68865966796875, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.0421711899791233, |
| "grad_norm": 0.0036174345295876265, |
| "learning_rate": 0.00011654449524065499, |
| "logits/chosen": 0.6072965860366821, |
| "logits/rejected": 0.9700977802276611, |
| "logps/chosen": -164.41070556640625, |
| "logps/rejected": -208.70692443847656, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4178385734558105, |
| "rewards/margins": 11.409514427185059, |
| "rewards/rejected": -7.991675853729248, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.0488517745302715, |
| "grad_norm": 0.0013750857906416059, |
| "learning_rate": 0.00011604343487137601, |
| "logits/chosen": 0.4669913947582245, |
| "logits/rejected": 0.8744410276412964, |
| "logps/chosen": -136.6684112548828, |
| "logps/rejected": -221.99180603027344, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.308939218521118, |
| "rewards/margins": 11.851880073547363, |
| "rewards/rejected": -8.542940139770508, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.0555323590814196, |
| "grad_norm": 0.0015664240345358849, |
| "learning_rate": 0.00011554059413695259, |
| "logits/chosen": 0.33473482728004456, |
| "logits/rejected": 0.9933146834373474, |
| "logps/chosen": -148.87991333007812, |
| "logps/rejected": -174.86378479003906, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.450471878051758, |
| "rewards/margins": 11.176468849182129, |
| "rewards/rejected": -7.725996494293213, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.0622129436325678, |
| "grad_norm": 0.0005284909857437015, |
| "learning_rate": 0.00011503599787519838, |
| "logits/chosen": 0.5126962065696716, |
| "logits/rejected": 1.1356326341629028, |
| "logps/chosen": -137.66116333007812, |
| "logps/rejected": -207.24697875976562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.605351448059082, |
| "rewards/margins": 12.428322792053223, |
| "rewards/rejected": -8.82297134399414, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.068893528183716, |
| "grad_norm": 0.0012326446594670415, |
| "learning_rate": 0.00011452967101064118, |
| "logits/chosen": 0.34365200996398926, |
| "logits/rejected": 1.3188669681549072, |
| "logps/chosen": -196.2615509033203, |
| "logps/rejected": -177.1357879638672, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4942498207092285, |
| "rewards/margins": 11.286696434020996, |
| "rewards/rejected": -7.792446136474609, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0755741127348644, |
| "grad_norm": 0.0008088697213679552, |
| "learning_rate": 0.00011402163855329199, |
| "logits/chosen": 0.6658412218093872, |
| "logits/rejected": 0.9367747902870178, |
| "logps/chosen": -148.8216094970703, |
| "logps/rejected": -237.14321899414062, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0566344261169434, |
| "rewards/margins": 11.411201477050781, |
| "rewards/rejected": -8.35456657409668, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.0822546972860125, |
| "grad_norm": 0.0008485389407724142, |
| "learning_rate": 0.00011351192559740949, |
| "logits/chosen": 0.7383836507797241, |
| "logits/rejected": 1.149275541305542, |
| "logps/chosen": -137.4690704345703, |
| "logps/rejected": -209.24774169921875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.417785882949829, |
| "rewards/margins": 11.462677001953125, |
| "rewards/rejected": -8.044892311096191, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.0889352818371607, |
| "grad_norm": 0.0005739150219596922, |
| "learning_rate": 0.0001130005573202606, |
| "logits/chosen": 0.5681736469268799, |
| "logits/rejected": 0.9365695714950562, |
| "logps/chosen": -163.19027709960938, |
| "logps/rejected": -242.56704711914062, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6804287433624268, |
| "rewards/margins": 11.32785415649414, |
| "rewards/rejected": -7.647425651550293, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.0956158663883089, |
| "grad_norm": 0.004342256113886833, |
| "learning_rate": 0.00011248755898087684, |
| "logits/chosen": 0.4348772168159485, |
| "logits/rejected": 0.9407061338424683, |
| "logps/chosen": -164.66812133789062, |
| "logps/rejected": -211.21351623535156, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6066393852233887, |
| "rewards/margins": 11.529861450195312, |
| "rewards/rejected": -7.923222541809082, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.1022964509394573, |
| "grad_norm": 0.0008769746054895222, |
| "learning_rate": 0.00011197295591880657, |
| "logits/chosen": 0.6984822750091553, |
| "logits/rejected": 1.179412841796875, |
| "logps/chosen": -115.85931396484375, |
| "logps/rejected": -204.60218811035156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4029932022094727, |
| "rewards/margins": 11.5657958984375, |
| "rewards/rejected": -8.162801742553711, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.1089770354906054, |
| "grad_norm": 0.052136704325675964, |
| "learning_rate": 0.00011145677355286353, |
| "logits/chosen": 0.35074859857559204, |
| "logits/rejected": 1.297317385673523, |
| "logps/chosen": -179.23977661132812, |
| "logps/rejected": -196.0187530517578, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2048532962799072, |
| "rewards/margins": 10.744298934936523, |
| "rewards/rejected": -7.539445877075195, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.1156576200417536, |
| "grad_norm": 0.004496046341955662, |
| "learning_rate": 0.00011093903737987102, |
| "logits/chosen": 0.4773109555244446, |
| "logits/rejected": 0.854899525642395, |
| "logps/chosen": -135.96896362304688, |
| "logps/rejected": -217.36251831054688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.521961212158203, |
| "rewards/margins": 12.137231826782227, |
| "rewards/rejected": -8.61527156829834, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.1223382045929018, |
| "grad_norm": 0.0014830527361482382, |
| "learning_rate": 0.0001104197729734027, |
| "logits/chosen": 0.4430505335330963, |
| "logits/rejected": 1.0172383785247803, |
| "logps/chosen": -130.85362243652344, |
| "logps/rejected": -194.0697479248047, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6902472972869873, |
| "rewards/margins": 11.970958709716797, |
| "rewards/rejected": -8.280712127685547, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.1290187891440502, |
| "grad_norm": 0.0010357052087783813, |
| "learning_rate": 0.00010989900598251933, |
| "logits/chosen": 0.5143346190452576, |
| "logits/rejected": 1.06320321559906, |
| "logps/chosen": -163.08689880371094, |
| "logps/rejected": -201.315673828125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4902918338775635, |
| "rewards/margins": 11.922452926635742, |
| "rewards/rejected": -8.432162284851074, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.1356993736951984, |
| "grad_norm": 0.003559267381206155, |
| "learning_rate": 0.00010937676213050178, |
| "logits/chosen": 0.5508258938789368, |
| "logits/rejected": 0.8938766121864319, |
| "logps/chosen": -177.08377075195312, |
| "logps/rejected": -236.21572875976562, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5201761722564697, |
| "rewards/margins": 11.900284767150879, |
| "rewards/rejected": -8.380107879638672, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.1423799582463465, |
| "grad_norm": 0.0015716857742518187, |
| "learning_rate": 0.00010885306721358045, |
| "logits/chosen": 0.5690699219703674, |
| "logits/rejected": 1.3002761602401733, |
| "logps/chosen": -158.36329650878906, |
| "logps/rejected": -188.37022399902344, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6385347843170166, |
| "rewards/margins": 11.156852722167969, |
| "rewards/rejected": -7.518317699432373, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.1490605427974947, |
| "grad_norm": 0.00707374420017004, |
| "learning_rate": 0.00010832794709966112, |
| "logits/chosen": 0.3383929431438446, |
| "logits/rejected": 1.2127389907836914, |
| "logps/chosen": -209.92825317382812, |
| "logps/rejected": -226.083740234375, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8767664432525635, |
| "rewards/margins": 12.013040542602539, |
| "rewards/rejected": -8.136273384094238, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.155741127348643, |
| "grad_norm": 0.0012596473097801208, |
| "learning_rate": 0.00010780142772704712, |
| "logits/chosen": 0.6638607978820801, |
| "logits/rejected": 1.1403982639312744, |
| "logps/chosen": -157.58836364746094, |
| "logps/rejected": -212.12307739257812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.922273635864258, |
| "rewards/margins": 11.61031436920166, |
| "rewards/rejected": -7.688040256500244, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.1624217118997913, |
| "grad_norm": 0.0015774251660332084, |
| "learning_rate": 0.00010727353510315816, |
| "logits/chosen": 0.5496118068695068, |
| "logits/rejected": 1.1589007377624512, |
| "logps/chosen": -145.5523223876953, |
| "logps/rejected": -200.73741149902344, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.641279697418213, |
| "rewards/margins": 12.338908195495605, |
| "rewards/rejected": -8.697628021240234, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.1691022964509394, |
| "grad_norm": 0.015453093685209751, |
| "learning_rate": 0.00010674429530324574, |
| "logits/chosen": 0.5172752141952515, |
| "logits/rejected": 1.1510488986968994, |
| "logps/chosen": -155.7484588623047, |
| "logps/rejected": -177.63027954101562, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.400092363357544, |
| "rewards/margins": 10.950430870056152, |
| "rewards/rejected": -7.5503387451171875, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.1757828810020876, |
| "grad_norm": 0.0012826044112443924, |
| "learning_rate": 0.00010621373446910502, |
| "logits/chosen": 0.5946585536003113, |
| "logits/rejected": 1.1312611103057861, |
| "logps/chosen": -182.75096130371094, |
| "logps/rejected": -220.31317138671875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6229946613311768, |
| "rewards/margins": 11.344440460205078, |
| "rewards/rejected": -7.7214460372924805, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.182463465553236, |
| "grad_norm": 0.04078257828950882, |
| "learning_rate": 0.00010568187880778373, |
| "logits/chosen": 0.43110349774360657, |
| "logits/rejected": 1.0659563541412354, |
| "logps/chosen": -184.86537170410156, |
| "logps/rejected": -219.26332092285156, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.9776532649993896, |
| "rewards/margins": 12.578085899353027, |
| "rewards/rejected": -8.600433349609375, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.1891440501043842, |
| "grad_norm": 0.0012859440175816417, |
| "learning_rate": 0.00010514875459028743, |
| "logits/chosen": 0.4430413842201233, |
| "logits/rejected": 0.9718153476715088, |
| "logps/chosen": -130.32984924316406, |
| "logps/rejected": -205.24440002441406, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.349815845489502, |
| "rewards/margins": 10.951803207397461, |
| "rewards/rejected": -7.601987361907959, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.1958246346555323, |
| "grad_norm": 0.018363507464528084, |
| "learning_rate": 0.00010461438815028219, |
| "logits/chosen": 0.2948397397994995, |
| "logits/rejected": 0.872904360294342, |
| "logps/chosen": -151.6688690185547, |
| "logps/rejected": -215.8356170654297, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.273361921310425, |
| "rewards/margins": 10.712809562683105, |
| "rewards/rejected": -7.43944787979126, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.2025052192066805, |
| "grad_norm": 0.0017469325102865696, |
| "learning_rate": 0.00010407880588279352, |
| "logits/chosen": 0.4774348735809326, |
| "logits/rejected": 0.7673771977424622, |
| "logps/chosen": -142.30799865722656, |
| "logps/rejected": -235.41400146484375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5183143615722656, |
| "rewards/margins": 12.07205867767334, |
| "rewards/rejected": -8.553744316101074, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.209185803757829, |
| "grad_norm": 0.0007553395698778331, |
| "learning_rate": 0.00010354203424290271, |
| "logits/chosen": 0.7918840050697327, |
| "logits/rejected": 1.1450823545455933, |
| "logps/chosen": -131.16664123535156, |
| "logps/rejected": -211.7412109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.386751174926758, |
| "rewards/margins": 10.85494327545166, |
| "rewards/rejected": -7.468192100524902, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.215866388308977, |
| "grad_norm": 0.0012744672130793333, |
| "learning_rate": 0.0001030040997444402, |
| "logits/chosen": 0.45629051327705383, |
| "logits/rejected": 0.9976529479026794, |
| "logps/chosen": -133.558349609375, |
| "logps/rejected": -192.5741424560547, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5578696727752686, |
| "rewards/margins": 11.33415699005127, |
| "rewards/rejected": -7.77628755569458, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.2225469728601253, |
| "grad_norm": 0.00047452072612941265, |
| "learning_rate": 0.00010246502895867568, |
| "logits/chosen": 0.8382459878921509, |
| "logits/rejected": 0.7453187108039856, |
| "logps/chosen": -130.78721618652344, |
| "logps/rejected": -261.71197509765625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.311988592147827, |
| "rewards/margins": 12.276436805725098, |
| "rewards/rejected": -8.964447975158691, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.2292275574112734, |
| "grad_norm": 0.0086443442851305, |
| "learning_rate": 0.0001019248485130059, |
| "logits/chosen": 0.4247654676437378, |
| "logits/rejected": 1.1591222286224365, |
| "logps/chosen": -167.55833435058594, |
| "logps/rejected": -179.604248046875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1582260131835938, |
| "rewards/margins": 11.254338264465332, |
| "rewards/rejected": -8.096114158630371, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.2359081419624216, |
| "grad_norm": 0.001544467406347394, |
| "learning_rate": 0.0001013835850896391, |
| "logits/chosen": 0.5242801308631897, |
| "logits/rejected": 1.4609646797180176, |
| "logps/chosen": -179.2179412841797, |
| "logps/rejected": -167.85665893554688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4588305950164795, |
| "rewards/margins": 11.103114128112793, |
| "rewards/rejected": -7.64428186416626, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.24258872651357, |
| "grad_norm": 0.0018972865073010325, |
| "learning_rate": 0.00010084126542427725, |
| "logits/chosen": 0.6289225220680237, |
| "logits/rejected": 0.774864137172699, |
| "logps/chosen": -153.0395965576172, |
| "logps/rejected": -283.26702880859375, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.832301616668701, |
| "rewards/margins": 12.483742713928223, |
| "rewards/rejected": -8.65144157409668, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.2492693110647182, |
| "grad_norm": 0.0005702719208784401, |
| "learning_rate": 0.0001002979163047954, |
| "logits/chosen": 0.5563308596611023, |
| "logits/rejected": 1.0048775672912598, |
| "logps/chosen": -160.7752685546875, |
| "logps/rejected": -206.54483032226562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5111823081970215, |
| "rewards/margins": 11.941852569580078, |
| "rewards/rejected": -8.430670738220215, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.2559498956158663, |
| "grad_norm": 0.0005867581348866224, |
| "learning_rate": 9.975356456991849e-05, |
| "logits/chosen": 0.8396896123886108, |
| "logits/rejected": 1.0179424285888672, |
| "logps/chosen": -144.9906768798828, |
| "logps/rejected": -236.50119018554688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.715435028076172, |
| "rewards/margins": 11.944793701171875, |
| "rewards/rejected": -8.229358673095703, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.2626304801670147, |
| "grad_norm": 0.00043765606824308634, |
| "learning_rate": 9.920823710789562e-05, |
| "logits/chosen": 0.36594095826148987, |
| "logits/rejected": 1.1739063262939453, |
| "logps/chosen": -170.1721954345703, |
| "logps/rejected": -185.6078643798828, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8701069355010986, |
| "rewards/margins": 11.88235092163086, |
| "rewards/rejected": -8.01224422454834, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.269311064718163, |
| "grad_norm": 0.00041320393211208284, |
| "learning_rate": 9.866196085517186e-05, |
| "logits/chosen": 0.4379793703556061, |
| "logits/rejected": 1.0098289251327515, |
| "logps/chosen": -138.4647216796875, |
| "logps/rejected": -204.14564514160156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3668053150177, |
| "rewards/margins": 11.314434051513672, |
| "rewards/rejected": -7.947629928588867, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.275991649269311, |
| "grad_norm": 0.0015216560568660498, |
| "learning_rate": 9.81147627950579e-05, |
| "logits/chosen": 0.5741678476333618, |
| "logits/rejected": 1.0209534168243408, |
| "logps/chosen": -117.83605194091797, |
| "logps/rejected": -161.2305908203125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5719006061553955, |
| "rewards/margins": 11.671441078186035, |
| "rewards/rejected": -8.099540710449219, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.2826722338204593, |
| "grad_norm": 0.0032318232115358114, |
| "learning_rate": 9.756666995639703e-05, |
| "logits/chosen": 0.5298473238945007, |
| "logits/rejected": 1.1004164218902588, |
| "logps/chosen": -155.68045043945312, |
| "logps/rejected": -188.41229248046875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.450162887573242, |
| "rewards/margins": 11.770234107971191, |
| "rewards/rejected": -8.32007122039795, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.2893528183716074, |
| "grad_norm": 0.0007364507764577866, |
| "learning_rate": 9.701770941223014e-05, |
| "logits/chosen": 0.38831934332847595, |
| "logits/rejected": 0.9996166229248047, |
| "logps/chosen": -149.40911865234375, |
| "logps/rejected": -181.202880859375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.700319290161133, |
| "rewards/margins": 11.73774528503418, |
| "rewards/rejected": -8.037424087524414, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.2960334029227558, |
| "grad_norm": 0.05426819995045662, |
| "learning_rate": 9.646790827845844e-05, |
| "logits/chosen": 0.45380842685699463, |
| "logits/rejected": 1.102933645248413, |
| "logps/chosen": -132.97164916992188, |
| "logps/rejected": -154.08615112304688, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5674822330474854, |
| "rewards/margins": 10.895905494689941, |
| "rewards/rejected": -7.328424453735352, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.302713987473904, |
| "grad_norm": 0.0008307976531796157, |
| "learning_rate": 9.5917293712504e-05, |
| "logits/chosen": 0.4264935255050659, |
| "logits/rejected": 1.022092580795288, |
| "logps/chosen": -174.43560791015625, |
| "logps/rejected": -219.11038208007812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.602630853652954, |
| "rewards/margins": 12.087784767150879, |
| "rewards/rejected": -8.485153198242188, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.3093945720250522, |
| "grad_norm": 0.005923965014517307, |
| "learning_rate": 9.536589291196837e-05, |
| "logits/chosen": 0.38043221831321716, |
| "logits/rejected": 1.2655529975891113, |
| "logps/chosen": -219.9903106689453, |
| "logps/rejected": -224.3175048828125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.751121997833252, |
| "rewards/margins": 12.251858711242676, |
| "rewards/rejected": -8.500736236572266, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.3160751565762006, |
| "grad_norm": 0.008259938098490238, |
| "learning_rate": 9.481373311328927e-05, |
| "logits/chosen": 0.3577250838279724, |
| "logits/rejected": 1.0607563257217407, |
| "logps/chosen": -192.20773315429688, |
| "logps/rejected": -223.62220764160156, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.199789047241211, |
| "rewards/margins": 12.089982032775879, |
| "rewards/rejected": -7.890193939208984, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.3227557411273487, |
| "grad_norm": 0.011571898125112057, |
| "learning_rate": 9.426084159039497e-05, |
| "logits/chosen": 0.6079049110412598, |
| "logits/rejected": 0.6202818155288696, |
| "logps/chosen": -129.2443389892578, |
| "logps/rejected": -254.39288330078125, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.321871042251587, |
| "rewards/margins": 12.733055114746094, |
| "rewards/rejected": -9.41118335723877, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.329436325678497, |
| "grad_norm": 0.003518011188134551, |
| "learning_rate": 9.370724565335733e-05, |
| "logits/chosen": 0.5599907636642456, |
| "logits/rejected": 0.6773720383644104, |
| "logps/chosen": -142.21826171875, |
| "logps/rejected": -256.0460510253906, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.407008171081543, |
| "rewards/margins": 12.075630187988281, |
| "rewards/rejected": -8.668621063232422, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.336116910229645, |
| "grad_norm": 0.00033826244180090725, |
| "learning_rate": 9.315297264704276e-05, |
| "logits/chosen": 0.36998850107192993, |
| "logits/rejected": 1.1482012271881104, |
| "logps/chosen": -161.43829345703125, |
| "logps/rejected": -188.72938537597656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6240572929382324, |
| "rewards/margins": 12.082491874694824, |
| "rewards/rejected": -8.45843505859375, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.3427974947807932, |
| "grad_norm": 0.013436054810881615, |
| "learning_rate": 9.259804994976145e-05, |
| "logits/chosen": 0.4420285224914551, |
| "logits/rejected": 1.2852901220321655, |
| "logps/chosen": -216.61392211914062, |
| "logps/rejected": -218.47210693359375, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.93255877494812, |
| "rewards/margins": 12.457169532775879, |
| "rewards/rejected": -8.524611473083496, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.3494780793319414, |
| "grad_norm": 0.0006867127376608551, |
| "learning_rate": 9.204250497191507e-05, |
| "logits/chosen": 0.3688305616378784, |
| "logits/rejected": 1.0229319334030151, |
| "logps/chosen": -187.53517150878906, |
| "logps/rejected": -212.68167114257812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5623180866241455, |
| "rewards/margins": 12.298954010009766, |
| "rewards/rejected": -8.736637115478516, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.3561586638830898, |
| "grad_norm": 0.006973725743591785, |
| "learning_rate": 9.148636515464286e-05, |
| "logits/chosen": 0.6111980676651001, |
| "logits/rejected": 1.1670434474945068, |
| "logps/chosen": -150.9393768310547, |
| "logps/rejected": -190.71719360351562, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.552490234375, |
| "rewards/margins": 11.955738067626953, |
| "rewards/rejected": -8.40324878692627, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.362839248434238, |
| "grad_norm": 0.0029133392963558435, |
| "learning_rate": 9.092965796846615e-05, |
| "logits/chosen": 0.43748268485069275, |
| "logits/rejected": 0.8366719484329224, |
| "logps/chosen": -141.9459686279297, |
| "logps/rejected": -248.22157287597656, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4116930961608887, |
| "rewards/margins": 13.216766357421875, |
| "rewards/rejected": -9.805072784423828, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.3695198329853862, |
| "grad_norm": 0.0006192025612108409, |
| "learning_rate": 9.037241091193146e-05, |
| "logits/chosen": 0.7180970907211304, |
| "logits/rejected": 1.0650739669799805, |
| "logps/chosen": -103.74508666992188, |
| "logps/rejected": -204.15809631347656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.131009101867676, |
| "rewards/margins": 11.749822616577148, |
| "rewards/rejected": -8.618814468383789, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.3762004175365345, |
| "grad_norm": 0.0005728130927309394, |
| "learning_rate": 8.981465151025214e-05, |
| "logits/chosen": 0.41642457246780396, |
| "logits/rejected": 0.8988510370254517, |
| "logps/chosen": -156.42083740234375, |
| "logps/rejected": -248.08001708984375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5374138355255127, |
| "rewards/margins": 12.660049438476562, |
| "rewards/rejected": -9.122636795043945, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.3828810020876827, |
| "grad_norm": 0.03418273478746414, |
| "learning_rate": 8.925640731394891e-05, |
| "logits/chosen": 0.40667593479156494, |
| "logits/rejected": 1.2365777492523193, |
| "logps/chosen": -173.7451934814453, |
| "logps/rejected": -170.29013061523438, |
| "loss": 0.0023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2876858711242676, |
| "rewards/margins": 12.047090530395508, |
| "rewards/rejected": -8.759405136108398, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.389561586638831, |
| "grad_norm": 0.00034636727650649846, |
| "learning_rate": 8.869770589748885e-05, |
| "logits/chosen": 0.6443273425102234, |
| "logits/rejected": 1.0803853273391724, |
| "logps/chosen": -149.794189453125, |
| "logps/rejected": -238.55108642578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2578701972961426, |
| "rewards/margins": 11.839977264404297, |
| "rewards/rejected": -8.58210563659668, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.396242171189979, |
| "grad_norm": 0.0011885147541761398, |
| "learning_rate": 8.813857485792346e-05, |
| "logits/chosen": 0.8002556562423706, |
| "logits/rejected": 0.8125602602958679, |
| "logps/chosen": -113.92469787597656, |
| "logps/rejected": -232.79171752929688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2659642696380615, |
| "rewards/margins": 12.342820167541504, |
| "rewards/rejected": -9.07685661315918, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.4029227557411272, |
| "grad_norm": 0.003782590152695775, |
| "learning_rate": 8.757904181352548e-05, |
| "logits/chosen": 0.5000881552696228, |
| "logits/rejected": 0.8626303672790527, |
| "logps/chosen": -140.60691833496094, |
| "logps/rejected": -224.2894287109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.359609365463257, |
| "rewards/margins": 12.288975715637207, |
| "rewards/rejected": -8.929367065429688, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.4096033402922756, |
| "grad_norm": 0.0002241008187411353, |
| "learning_rate": 8.701913440242459e-05, |
| "logits/chosen": 0.7766547799110413, |
| "logits/rejected": 0.6242997050285339, |
| "logps/chosen": -118.01210021972656, |
| "logps/rejected": -280.14080810546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.590362310409546, |
| "rewards/margins": 12.91071605682373, |
| "rewards/rejected": -9.320354461669922, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.4162839248434238, |
| "grad_norm": 0.0009039054857566953, |
| "learning_rate": 8.645888028124245e-05, |
| "logits/chosen": 0.45011216402053833, |
| "logits/rejected": 1.036590814590454, |
| "logps/chosen": -150.9805145263672, |
| "logps/rejected": -212.8893585205078, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.292708396911621, |
| "rewards/margins": 12.373250961303711, |
| "rewards/rejected": -9.080541610717773, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.422964509394572, |
| "grad_norm": 0.000518288929015398, |
| "learning_rate": 8.589830712372634e-05, |
| "logits/chosen": 0.4165090322494507, |
| "logits/rejected": 1.087862491607666, |
| "logps/chosen": -157.97787475585938, |
| "logps/rejected": -206.57342529296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5738751888275146, |
| "rewards/margins": 12.204456329345703, |
| "rewards/rejected": -8.630579948425293, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.4296450939457204, |
| "grad_norm": 0.0008133598603308201, |
| "learning_rate": 8.533744261938238e-05, |
| "logits/chosen": 0.2943029999732971, |
| "logits/rejected": 1.3612968921661377, |
| "logps/chosen": -220.78323364257812, |
| "logps/rejected": -184.81143188476562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.250751495361328, |
| "rewards/margins": 12.059762001037598, |
| "rewards/rejected": -8.80901050567627, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.4363256784968685, |
| "grad_norm": 0.0004915079916827381, |
| "learning_rate": 8.477631447210778e-05, |
| "logits/chosen": 0.24841757118701935, |
| "logits/rejected": 1.2294715642929077, |
| "logps/chosen": -194.55015563964844, |
| "logps/rejected": -207.66958618164062, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5389163494110107, |
| "rewards/margins": 12.835709571838379, |
| "rewards/rejected": -9.296793937683105, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.4430062630480167, |
| "grad_norm": 0.004459040705114603, |
| "learning_rate": 8.421495039882238e-05, |
| "logits/chosen": 0.6321086287498474, |
| "logits/rejected": 0.9829391837120056, |
| "logps/chosen": -146.0184326171875, |
| "logps/rejected": -238.7043914794922, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0472970008850098, |
| "rewards/margins": 13.108593940734863, |
| "rewards/rejected": -10.061296463012695, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.4496868475991649, |
| "grad_norm": 0.002996882889419794, |
| "learning_rate": 8.365337812809957e-05, |
| "logits/chosen": 0.5133633613586426, |
| "logits/rejected": 1.3615784645080566, |
| "logps/chosen": -159.088134765625, |
| "logps/rejected": -166.50949096679688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.693748712539673, |
| "rewards/margins": 11.46066951751709, |
| "rewards/rejected": -7.766920566558838, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.456367432150313, |
| "grad_norm": 0.007244854234158993, |
| "learning_rate": 8.309162539879668e-05, |
| "logits/chosen": 0.5689404010772705, |
| "logits/rejected": 0.7958050966262817, |
| "logps/chosen": -136.24407958984375, |
| "logps/rejected": -253.57131958007812, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1574044227600098, |
| "rewards/margins": 12.73931884765625, |
| "rewards/rejected": -9.581913948059082, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.4630480167014615, |
| "grad_norm": 0.0009683882817625999, |
| "learning_rate": 8.252971995868472e-05, |
| "logits/chosen": 0.4068656861782074, |
| "logits/rejected": 1.104844093322754, |
| "logps/chosen": -153.0896453857422, |
| "logps/rejected": -200.84996032714844, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2435359954833984, |
| "rewards/margins": 11.93062973022461, |
| "rewards/rejected": -8.687093734741211, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.4697286012526096, |
| "grad_norm": 0.00201588892377913, |
| "learning_rate": 8.196768956307795e-05, |
| "logits/chosen": 0.5268558263778687, |
| "logits/rejected": 0.9168641567230225, |
| "logps/chosen": -178.9090118408203, |
| "logps/rejected": -254.7775115966797, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.295229911804199, |
| "rewards/margins": 13.071582794189453, |
| "rewards/rejected": -9.77635383605957, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.4764091858037578, |
| "grad_norm": 0.00043293953058309853, |
| "learning_rate": 8.140556197346273e-05, |
| "logits/chosen": 0.45406419038772583, |
| "logits/rejected": 1.1222517490386963, |
| "logps/chosen": -201.90655517578125, |
| "logps/rejected": -249.59384155273438, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7309956550598145, |
| "rewards/margins": 12.679603576660156, |
| "rewards/rejected": -8.948606491088867, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.4830897703549062, |
| "grad_norm": 0.0047185542061924934, |
| "learning_rate": 8.084336495612638e-05, |
| "logits/chosen": 0.5548102259635925, |
| "logits/rejected": 1.0701701641082764, |
| "logps/chosen": -137.0140838623047, |
| "logps/rejected": -196.58352661132812, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2548980712890625, |
| "rewards/margins": 12.659390449523926, |
| "rewards/rejected": -9.404491424560547, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.4897703549060544, |
| "grad_norm": 0.0012326568830758333, |
| "learning_rate": 8.02811262807855e-05, |
| "logits/chosen": 0.7784193158149719, |
| "logits/rejected": 1.0100144147872925, |
| "logps/chosen": -135.61509704589844, |
| "logps/rejected": -227.09725952148438, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.833930730819702, |
| "rewards/margins": 11.9942045211792, |
| "rewards/rejected": -9.160274505615234, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.4964509394572025, |
| "grad_norm": 0.003940995782613754, |
| "learning_rate": 7.971887371921452e-05, |
| "logits/chosen": 0.45579829812049866, |
| "logits/rejected": 1.0411605834960938, |
| "logps/chosen": -151.62644958496094, |
| "logps/rejected": -205.2895965576172, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.106025218963623, |
| "rewards/margins": 12.4378023147583, |
| "rewards/rejected": -9.331775665283203, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.5031315240083507, |
| "grad_norm": 0.0006113062263466418, |
| "learning_rate": 7.915663504387365e-05, |
| "logits/chosen": 0.7197325229644775, |
| "logits/rejected": 1.078650712966919, |
| "logps/chosen": -145.16757202148438, |
| "logps/rejected": -241.98052978515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.173128128051758, |
| "rewards/margins": 13.138956069946289, |
| "rewards/rejected": -9.965826988220215, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.5098121085594989, |
| "grad_norm": 0.0055145323276519775, |
| "learning_rate": 7.859443802653728e-05, |
| "logits/chosen": 0.3827665150165558, |
| "logits/rejected": 1.2257570028305054, |
| "logps/chosen": -188.14073181152344, |
| "logps/rejected": -200.613037109375, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5312018394470215, |
| "rewards/margins": 12.372119903564453, |
| "rewards/rejected": -8.840917587280273, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.516492693110647, |
| "grad_norm": 0.001715692924335599, |
| "learning_rate": 7.803231043692206e-05, |
| "logits/chosen": 0.560634195804596, |
| "logits/rejected": 0.9691610932350159, |
| "logps/chosen": -154.68666076660156, |
| "logps/rejected": -228.98910522460938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.682804584503174, |
| "rewards/margins": 14.054306030273438, |
| "rewards/rejected": -10.371501922607422, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.5231732776617954, |
| "grad_norm": 0.00392462033778429, |
| "learning_rate": 7.74702800413153e-05, |
| "logits/chosen": 0.47522974014282227, |
| "logits/rejected": 1.1297662258148193, |
| "logps/chosen": -155.83364868164062, |
| "logps/rejected": -188.842041015625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.877500534057617, |
| "rewards/margins": 12.252093315124512, |
| "rewards/rejected": -8.374591827392578, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.5298538622129436, |
| "grad_norm": 0.000706602178979665, |
| "learning_rate": 7.690837460120337e-05, |
| "logits/chosen": 0.3643675446510315, |
| "logits/rejected": 0.6753086447715759, |
| "logps/chosen": -132.07772827148438, |
| "logps/rejected": -268.08905029296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.478888988494873, |
| "rewards/margins": 13.154215812683105, |
| "rewards/rejected": -9.675326347351074, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.536534446764092, |
| "grad_norm": 0.003365734126418829, |
| "learning_rate": 7.634662187190045e-05, |
| "logits/chosen": 0.648161768913269, |
| "logits/rejected": 1.1449542045593262, |
| "logps/chosen": -159.03543090820312, |
| "logps/rejected": -207.7163848876953, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.997041702270508, |
| "rewards/margins": 12.427491188049316, |
| "rewards/rejected": -9.430448532104492, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.5432150313152402, |
| "grad_norm": 0.0004856723826378584, |
| "learning_rate": 7.578504960117764e-05, |
| "logits/chosen": 0.792070209980011, |
| "logits/rejected": 1.158513069152832, |
| "logps/chosen": -141.17575073242188, |
| "logps/rejected": -243.63555908203125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.499164581298828, |
| "rewards/margins": 12.730960845947266, |
| "rewards/rejected": -9.231797218322754, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.5498956158663884, |
| "grad_norm": 0.0010725751053541899, |
| "learning_rate": 7.522368552789226e-05, |
| "logits/chosen": 0.6130843162536621, |
| "logits/rejected": 0.9492489099502563, |
| "logps/chosen": -115.36308288574219, |
| "logps/rejected": -209.8251953125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.06745982170105, |
| "rewards/margins": 11.86876106262207, |
| "rewards/rejected": -8.801301002502441, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.5565762004175365, |
| "grad_norm": 0.004260468762367964, |
| "learning_rate": 7.466255738061765e-05, |
| "logits/chosen": 0.4129951298236847, |
| "logits/rejected": 1.1070955991744995, |
| "logps/chosen": -170.56251525878906, |
| "logps/rejected": -215.829345703125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.782398223876953, |
| "rewards/margins": 12.914801597595215, |
| "rewards/rejected": -9.132403373718262, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.5632567849686847, |
| "grad_norm": 0.00023358217731583863, |
| "learning_rate": 7.410169287627369e-05, |
| "logits/chosen": 0.7674708962440491, |
| "logits/rejected": 0.951148509979248, |
| "logps/chosen": -153.4148406982422, |
| "logps/rejected": -276.1761474609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.604790210723877, |
| "rewards/margins": 13.036596298217773, |
| "rewards/rejected": -9.431807518005371, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.5699373695198329, |
| "grad_norm": 0.0016559605719521642, |
| "learning_rate": 7.354111971875756e-05, |
| "logits/chosen": 0.47153300046920776, |
| "logits/rejected": 1.1286731958389282, |
| "logps/chosen": -154.3711700439453, |
| "logps/rejected": -184.73593139648438, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.253800868988037, |
| "rewards/margins": 11.487798690795898, |
| "rewards/rejected": -8.233997344970703, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.5766179540709813, |
| "grad_norm": 0.0021103417966514826, |
| "learning_rate": 7.298086559757542e-05, |
| "logits/chosen": 0.5945063233375549, |
| "logits/rejected": 1.0322848558425903, |
| "logps/chosen": -128.76268005371094, |
| "logps/rejected": -196.54275512695312, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2880306243896484, |
| "rewards/margins": 12.563961029052734, |
| "rewards/rejected": -9.27592945098877, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.5832985386221294, |
| "grad_norm": 0.001216527889482677, |
| "learning_rate": 7.242095818647454e-05, |
| "logits/chosen": 0.31693902611732483, |
| "logits/rejected": 1.3508694171905518, |
| "logps/chosen": -172.87911987304688, |
| "logps/rejected": -180.53089904785156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3253848552703857, |
| "rewards/margins": 11.522850036621094, |
| "rewards/rejected": -8.197465896606445, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.5899791231732776, |
| "grad_norm": 0.0009002351434901357, |
| "learning_rate": 7.186142514207653e-05, |
| "logits/chosen": 0.4034768342971802, |
| "logits/rejected": 1.1458234786987305, |
| "logps/chosen": -154.97938537597656, |
| "logps/rejected": -189.22506713867188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2304248809814453, |
| "rewards/margins": 11.878856658935547, |
| "rewards/rejected": -8.648432731628418, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.596659707724426, |
| "grad_norm": 0.00505381915718317, |
| "learning_rate": 7.130229410251116e-05, |
| "logits/chosen": 0.3664909899234772, |
| "logits/rejected": 0.9492464065551758, |
| "logps/chosen": -182.10992431640625, |
| "logps/rejected": -226.52783203125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0609524250030518, |
| "rewards/margins": 11.703436851501465, |
| "rewards/rejected": -8.642484664916992, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.6033402922755742, |
| "grad_norm": 0.0007440568879246712, |
| "learning_rate": 7.074359268605111e-05, |
| "logits/chosen": 0.717093825340271, |
| "logits/rejected": 0.741437554359436, |
| "logps/chosen": -150.6142120361328, |
| "logps/rejected": -280.0047912597656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4058785438537598, |
| "rewards/margins": 13.219608306884766, |
| "rewards/rejected": -9.813730239868164, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.6100208768267223, |
| "grad_norm": 0.0016615077620372176, |
| "learning_rate": 7.018534848974788e-05, |
| "logits/chosen": 0.5540122985839844, |
| "logits/rejected": 1.253514289855957, |
| "logps/chosen": -193.7939453125, |
| "logps/rejected": -226.57171630859375, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.350304126739502, |
| "rewards/margins": 13.100728988647461, |
| "rewards/rejected": -9.750425338745117, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.6167014613778705, |
| "grad_norm": 0.0004508822748903185, |
| "learning_rate": 6.962758908806857e-05, |
| "logits/chosen": 0.47170671820640564, |
| "logits/rejected": 1.2200703620910645, |
| "logps/chosen": -208.1016845703125, |
| "logps/rejected": -205.09820556640625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2683615684509277, |
| "rewards/margins": 12.15200138092041, |
| "rewards/rejected": -8.883639335632324, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.6233820459290187, |
| "grad_norm": 0.0014309012331068516, |
| "learning_rate": 6.907034203153386e-05, |
| "logits/chosen": 0.28136053681373596, |
| "logits/rejected": 1.2956924438476562, |
| "logps/chosen": -197.4197235107422, |
| "logps/rejected": -165.1903533935547, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7073264122009277, |
| "rewards/margins": 12.170064926147461, |
| "rewards/rejected": -8.462738037109375, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.6300626304801669, |
| "grad_norm": 0.0004903227090835571, |
| "learning_rate": 6.851363484535715e-05, |
| "logits/chosen": 0.5373984575271606, |
| "logits/rejected": 1.427203893661499, |
| "logps/chosen": -200.0109405517578, |
| "logps/rejected": -210.7964630126953, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.652834892272949, |
| "rewards/margins": 12.517841339111328, |
| "rewards/rejected": -8.865007400512695, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.6367432150313153, |
| "grad_norm": 0.0005771399009972811, |
| "learning_rate": 6.795749502808498e-05, |
| "logits/chosen": 0.38623395562171936, |
| "logits/rejected": 1.2658888101577759, |
| "logps/chosen": -161.20318603515625, |
| "logps/rejected": -193.91897583007812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.443969249725342, |
| "rewards/margins": 12.874672889709473, |
| "rewards/rejected": -9.430703163146973, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.6434237995824634, |
| "grad_norm": 0.0004550297453533858, |
| "learning_rate": 6.74019500502386e-05, |
| "logits/chosen": 0.31895774602890015, |
| "logits/rejected": 1.2252631187438965, |
| "logps/chosen": -179.82730102539062, |
| "logps/rejected": -198.14735412597656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.135148525238037, |
| "rewards/margins": 11.938846588134766, |
| "rewards/rejected": -8.803698539733887, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.6501043841336118, |
| "grad_norm": 0.0006330306641757488, |
| "learning_rate": 6.684702735295725e-05, |
| "logits/chosen": 0.59703129529953, |
| "logits/rejected": 1.1867053508758545, |
| "logps/chosen": -145.04815673828125, |
| "logps/rejected": -198.34249877929688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.332218647003174, |
| "rewards/margins": 12.267135620117188, |
| "rewards/rejected": -8.934917449951172, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.65678496868476, |
| "grad_norm": 0.0033348165452480316, |
| "learning_rate": 6.629275434664267e-05, |
| "logits/chosen": 0.3704008460044861, |
| "logits/rejected": 1.013413429260254, |
| "logps/chosen": -181.12796020507812, |
| "logps/rejected": -244.9725341796875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5755138397216797, |
| "rewards/margins": 12.979727745056152, |
| "rewards/rejected": -9.404214859008789, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.6634655532359082, |
| "grad_norm": 0.0004896700265817344, |
| "learning_rate": 6.573915840960506e-05, |
| "logits/chosen": 0.3934718668460846, |
| "logits/rejected": 1.2252607345581055, |
| "logps/chosen": -178.19854736328125, |
| "logps/rejected": -183.23167419433594, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2613682746887207, |
| "rewards/margins": 12.15888500213623, |
| "rewards/rejected": -8.897516250610352, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.6701461377870563, |
| "grad_norm": 0.0017181966686621308, |
| "learning_rate": 6.518626688671075e-05, |
| "logits/chosen": 0.6154460906982422, |
| "logits/rejected": 0.8937150239944458, |
| "logps/chosen": -150.39620971679688, |
| "logps/rejected": -242.8111114501953, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4164161682128906, |
| "rewards/margins": 13.423528671264648, |
| "rewards/rejected": -10.007112503051758, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.6768267223382045, |
| "grad_norm": 0.00037543641519732773, |
| "learning_rate": 6.463410708803162e-05, |
| "logits/chosen": 0.6243424415588379, |
| "logits/rejected": 1.0089101791381836, |
| "logps/chosen": -118.72608947753906, |
| "logps/rejected": -188.4051971435547, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.186236619949341, |
| "rewards/margins": 11.791353225708008, |
| "rewards/rejected": -8.605117797851562, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.6835073068893527, |
| "grad_norm": 0.012711738236248493, |
| "learning_rate": 6.408270628749605e-05, |
| "logits/chosen": 0.49875199794769287, |
| "logits/rejected": 1.288053274154663, |
| "logps/chosen": -181.6597442626953, |
| "logps/rejected": -191.96499633789062, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1369545459747314, |
| "rewards/margins": 11.313640594482422, |
| "rewards/rejected": -8.17668628692627, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.690187891440501, |
| "grad_norm": 0.0029580378904938698, |
| "learning_rate": 6.353209172154159e-05, |
| "logits/chosen": 0.5064088106155396, |
| "logits/rejected": 0.9738377928733826, |
| "logps/chosen": -151.7202606201172, |
| "logps/rejected": -202.4073028564453, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9301891326904297, |
| "rewards/margins": 11.945601463317871, |
| "rewards/rejected": -9.015412330627441, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.6968684759916492, |
| "grad_norm": 0.001020129886455834, |
| "learning_rate": 6.298229058776986e-05, |
| "logits/chosen": 0.700631320476532, |
| "logits/rejected": 0.908123254776001, |
| "logps/chosen": -131.9924774169922, |
| "logps/rejected": -255.37557983398438, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.146085262298584, |
| "rewards/margins": 12.487560272216797, |
| "rewards/rejected": -9.341474533081055, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.7035490605427976, |
| "grad_norm": 0.0003875437832903117, |
| "learning_rate": 6.243333004360298e-05, |
| "logits/chosen": 0.5261640548706055, |
| "logits/rejected": 1.0396316051483154, |
| "logps/chosen": -147.33441162109375, |
| "logps/rejected": -225.6119384765625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4174602031707764, |
| "rewards/margins": 13.032320022583008, |
| "rewards/rejected": -9.614859580993652, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.7102296450939458, |
| "grad_norm": 0.0135966707020998, |
| "learning_rate": 6.188523720494211e-05, |
| "logits/chosen": 0.20291149616241455, |
| "logits/rejected": 1.127085566520691, |
| "logps/chosen": -208.96347045898438, |
| "logps/rejected": -220.78175354003906, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3484673500061035, |
| "rewards/margins": 11.46480655670166, |
| "rewards/rejected": -8.116339683532715, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.716910229645094, |
| "grad_norm": 0.0002224293421022594, |
| "learning_rate": 6.133803914482815e-05, |
| "logits/chosen": 0.7359347939491272, |
| "logits/rejected": 0.9675177931785583, |
| "logps/chosen": -172.76893615722656, |
| "logps/rejected": -252.0592498779297, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5859246253967285, |
| "rewards/margins": 12.79989242553711, |
| "rewards/rejected": -9.213968276977539, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.7235908141962422, |
| "grad_norm": 0.002110100816935301, |
| "learning_rate": 6.0791762892104416e-05, |
| "logits/chosen": 0.6298213601112366, |
| "logits/rejected": 1.1915262937545776, |
| "logps/chosen": -159.96487426757812, |
| "logps/rejected": -225.63790893554688, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.81847882270813, |
| "rewards/margins": 12.683730125427246, |
| "rewards/rejected": -8.865250587463379, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.7302713987473903, |
| "grad_norm": 0.001850843895226717, |
| "learning_rate": 6.024643543008152e-05, |
| "logits/chosen": 0.6120001077651978, |
| "logits/rejected": 0.9265426397323608, |
| "logps/chosen": -136.79991149902344, |
| "logps/rejected": -205.8091583251953, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2155539989471436, |
| "rewards/margins": 12.165740966796875, |
| "rewards/rejected": -8.950185775756836, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.7369519832985385, |
| "grad_norm": 0.009307686239480972, |
| "learning_rate": 5.970208369520461e-05, |
| "logits/chosen": 0.41133901476860046, |
| "logits/rejected": 1.244918942451477, |
| "logps/chosen": -170.95782470703125, |
| "logps/rejected": -175.3126678466797, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.113586187362671, |
| "rewards/margins": 11.052433013916016, |
| "rewards/rejected": -7.938846588134766, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.743632567849687, |
| "grad_norm": 0.0012803918216377497, |
| "learning_rate": 5.915873457572276e-05, |
| "logits/chosen": 0.633670449256897, |
| "logits/rejected": 1.3272953033447266, |
| "logps/chosen": -195.4368896484375, |
| "logps/rejected": -219.29466247558594, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7701711654663086, |
| "rewards/margins": 12.498077392578125, |
| "rewards/rejected": -8.7279052734375, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.750313152400835, |
| "grad_norm": 0.002625958528369665, |
| "learning_rate": 5.861641491036095e-05, |
| "logits/chosen": 0.4034227430820465, |
| "logits/rejected": 1.1840628385543823, |
| "logps/chosen": -175.64141845703125, |
| "logps/rejected": -210.2655792236328, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4675025939941406, |
| "rewards/margins": 12.683855056762695, |
| "rewards/rejected": -9.216352462768555, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.7569937369519835, |
| "grad_norm": 0.0024748044088482857, |
| "learning_rate": 5.807515148699412e-05, |
| "logits/chosen": 0.6530991196632385, |
| "logits/rejected": 1.0524541139602661, |
| "logps/chosen": -141.74876403808594, |
| "logps/rejected": -196.0273895263672, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1580276489257812, |
| "rewards/margins": 12.469364166259766, |
| "rewards/rejected": -9.311336517333984, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.7636743215031316, |
| "grad_norm": 0.009714308194816113, |
| "learning_rate": 5.75349710413243e-05, |
| "logits/chosen": 0.5566273927688599, |
| "logits/rejected": 0.9325087666511536, |
| "logps/chosen": -132.86610412597656, |
| "logps/rejected": -217.09593200683594, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0257492065429688, |
| "rewards/margins": 11.39592170715332, |
| "rewards/rejected": -8.370172500610352, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.7703549060542798, |
| "grad_norm": 0.00043294430361129344, |
| "learning_rate": 5.699590025555984e-05, |
| "logits/chosen": 0.5268535017967224, |
| "logits/rejected": 1.4419851303100586, |
| "logps/chosen": -161.57455444335938, |
| "logps/rejected": -173.24090576171875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.010202884674072, |
| "rewards/margins": 12.556886672973633, |
| "rewards/rejected": -8.546682357788086, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.777035490605428, |
| "grad_norm": 0.0034687798470258713, |
| "learning_rate": 5.645796575709731e-05, |
| "logits/chosen": 0.5220036506652832, |
| "logits/rejected": 1.2265688180923462, |
| "logps/chosen": -148.57662963867188, |
| "logps/rejected": -177.5288543701172, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.401430130004883, |
| "rewards/margins": 12.56801700592041, |
| "rewards/rejected": -9.166585922241211, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.7837160751565762, |
| "grad_norm": 0.0005332541186362505, |
| "learning_rate": 5.592119411720651e-05, |
| "logits/chosen": 0.631429135799408, |
| "logits/rejected": 1.1897830963134766, |
| "logps/chosen": -164.9969482421875, |
| "logps/rejected": -215.81735229492188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4762606620788574, |
| "rewards/margins": 12.587698936462402, |
| "rewards/rejected": -9.111437797546387, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.7903966597077243, |
| "grad_norm": 0.0007586259744130075, |
| "learning_rate": 5.5385611849717856e-05, |
| "logits/chosen": 0.7947689294815063, |
| "logits/rejected": 1.1861993074417114, |
| "logps/chosen": -150.67178344726562, |
| "logps/rejected": -223.85858154296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8535823822021484, |
| "rewards/margins": 13.847382545471191, |
| "rewards/rejected": -9.993799209594727, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.7970772442588725, |
| "grad_norm": 0.002334814053028822, |
| "learning_rate": 5.4851245409712585e-05, |
| "logits/chosen": 0.7192946672439575, |
| "logits/rejected": 1.0370632410049438, |
| "logps/chosen": -134.57066345214844, |
| "logps/rejected": -200.06007385253906, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.05899715423584, |
| "rewards/margins": 12.443685531616211, |
| "rewards/rejected": -9.384687423706055, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.803757828810021, |
| "grad_norm": 0.01562592014670372, |
| "learning_rate": 5.4318121192216314e-05, |
| "logits/chosen": 0.5374943017959595, |
| "logits/rejected": 1.2269227504730225, |
| "logps/chosen": -183.84536743164062, |
| "logps/rejected": -224.18148803710938, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.26055645942688, |
| "rewards/margins": 12.747486114501953, |
| "rewards/rejected": -9.486929893493652, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.810438413361169, |
| "grad_norm": 0.00042984873289242387, |
| "learning_rate": 5.378626553089499e-05, |
| "logits/chosen": 0.4885158836841583, |
| "logits/rejected": 1.0233110189437866, |
| "logps/chosen": -206.98069763183594, |
| "logps/rejected": -265.67486572265625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3298757076263428, |
| "rewards/margins": 13.249074935913086, |
| "rewards/rejected": -9.91919994354248, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.8171189979123175, |
| "grad_norm": 0.004903177265077829, |
| "learning_rate": 5.3255704696754276e-05, |
| "logits/chosen": 0.3477621078491211, |
| "logits/rejected": 1.3452774286270142, |
| "logps/chosen": -222.60406494140625, |
| "logps/rejected": -210.79745483398438, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.442991256713867, |
| "rewards/margins": 12.528120994567871, |
| "rewards/rejected": -9.08513069152832, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.8237995824634656, |
| "grad_norm": 0.0014413794269785285, |
| "learning_rate": 5.272646489684186e-05, |
| "logits/chosen": 0.5790391564369202, |
| "logits/rejected": 0.7671089172363281, |
| "logps/chosen": -113.13287353515625, |
| "logps/rejected": -217.6890106201172, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.147888422012329, |
| "rewards/margins": 13.171778678894043, |
| "rewards/rejected": -10.023889541625977, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.8304801670146138, |
| "grad_norm": 0.00028897402808070183, |
| "learning_rate": 5.219857227295291e-05, |
| "logits/chosen": 0.709730327129364, |
| "logits/rejected": 0.8601136207580566, |
| "logps/chosen": -156.91653442382812, |
| "logps/rejected": -295.93157958984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7537806034088135, |
| "rewards/margins": 13.796693801879883, |
| "rewards/rejected": -10.042911529541016, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.837160751565762, |
| "grad_norm": 0.0007451316341757774, |
| "learning_rate": 5.167205290033892e-05, |
| "logits/chosen": 0.658225953578949, |
| "logits/rejected": 1.08082115650177, |
| "logps/chosen": -171.94277954101562, |
| "logps/rejected": -237.1277618408203, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.034437894821167, |
| "rewards/margins": 12.588874816894531, |
| "rewards/rejected": -9.554437637329102, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.8438413361169101, |
| "grad_norm": 0.02490099146962166, |
| "learning_rate": 5.114693278641957e-05, |
| "logits/chosen": 0.6082062721252441, |
| "logits/rejected": 0.9871134161949158, |
| "logps/chosen": -178.09619140625, |
| "logps/rejected": -247.61810302734375, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1589179039001465, |
| "rewards/margins": 12.523808479309082, |
| "rewards/rejected": -9.364890098571777, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.8505219206680583, |
| "grad_norm": 0.005023833829909563, |
| "learning_rate": 5.062323786949824e-05, |
| "logits/chosen": 0.6513435244560242, |
| "logits/rejected": 1.0219300985336304, |
| "logps/chosen": -171.38177490234375, |
| "logps/rejected": -259.73443603515625, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3252928256988525, |
| "rewards/margins": 12.778488159179688, |
| "rewards/rejected": -9.453194618225098, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.8572025052192067, |
| "grad_norm": 0.001171917305327952, |
| "learning_rate": 5.0100994017480704e-05, |
| "logits/chosen": 0.7154785990715027, |
| "logits/rejected": 1.23942232131958, |
| "logps/chosen": -158.58740234375, |
| "logps/rejected": -199.68759155273438, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3045573234558105, |
| "rewards/margins": 12.734079360961914, |
| "rewards/rejected": -9.429522514343262, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.8638830897703549, |
| "grad_norm": 0.0008877121144905686, |
| "learning_rate": 4.958022702659731e-05, |
| "logits/chosen": 0.3124326765537262, |
| "logits/rejected": 1.3723382949829102, |
| "logps/chosen": -222.2159423828125, |
| "logps/rejected": -199.4228057861328, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.163365364074707, |
| "rewards/margins": 13.488718032836914, |
| "rewards/rejected": -9.325352668762207, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.8705636743215033, |
| "grad_norm": 0.0010831408435478806, |
| "learning_rate": 4.9060962620129e-05, |
| "logits/chosen": 0.4300207495689392, |
| "logits/rejected": 1.1081445217132568, |
| "logps/chosen": -144.10047912597656, |
| "logps/rejected": -205.8716583251953, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.295637607574463, |
| "rewards/margins": 12.61944580078125, |
| "rewards/rejected": -9.323808670043945, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.8772442588726515, |
| "grad_norm": 0.00025624182308092713, |
| "learning_rate": 4.854322644713648e-05, |
| "logits/chosen": 0.6763560175895691, |
| "logits/rejected": 0.9413488507270813, |
| "logps/chosen": -142.95899963378906, |
| "logps/rejected": -257.82391357421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.138211727142334, |
| "rewards/margins": 13.099564552307129, |
| "rewards/rejected": -9.961352348327637, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.8839248434237996, |
| "grad_norm": 0.004967350512742996, |
| "learning_rate": 4.8027044081193434e-05, |
| "logits/chosen": 0.9007304906845093, |
| "logits/rejected": 1.1363219022750854, |
| "logps/chosen": -121.35027313232422, |
| "logps/rejected": -230.96900939941406, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0166122913360596, |
| "rewards/margins": 12.013561248779297, |
| "rewards/rejected": -8.996949195861816, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.8906054279749478, |
| "grad_norm": 0.0005215193377807736, |
| "learning_rate": 4.751244101912317e-05, |
| "logits/chosen": 0.4525222182273865, |
| "logits/rejected": 1.0277119874954224, |
| "logps/chosen": -166.93800354003906, |
| "logps/rejected": -224.22732543945312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6909801959991455, |
| "rewards/margins": 12.965056419372559, |
| "rewards/rejected": -9.274076461791992, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.897286012526096, |
| "grad_norm": 0.0002354821190237999, |
| "learning_rate": 4.6999442679739404e-05, |
| "logits/chosen": 0.5379772782325745, |
| "logits/rejected": 1.0427289009094238, |
| "logps/chosen": -121.2558364868164, |
| "logps/rejected": -200.19229125976562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4199142456054688, |
| "rewards/margins": 12.075815200805664, |
| "rewards/rejected": -8.655900955200195, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.9039665970772441, |
| "grad_norm": 0.00031139524071477354, |
| "learning_rate": 4.648807440259054e-05, |
| "logits/chosen": 0.6613008975982666, |
| "logits/rejected": 1.0043450593948364, |
| "logps/chosen": -117.17173767089844, |
| "logps/rejected": -229.09197998046875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0674209594726562, |
| "rewards/margins": 12.071032524108887, |
| "rewards/rejected": -9.003610610961914, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.9106471816283925, |
| "grad_norm": 0.0003046044730581343, |
| "learning_rate": 4.5978361446708026e-05, |
| "logits/chosen": 0.5342459678649902, |
| "logits/rejected": 1.357889175415039, |
| "logps/chosen": -207.97657775878906, |
| "logps/rejected": -223.646240234375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.909088611602783, |
| "rewards/margins": 12.76576042175293, |
| "rewards/rejected": -8.856673240661621, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.9173277661795407, |
| "grad_norm": 0.003803228959441185, |
| "learning_rate": 4.547032898935883e-05, |
| "logits/chosen": 0.42278778553009033, |
| "logits/rejected": 0.9761509895324707, |
| "logps/chosen": -219.009765625, |
| "logps/rejected": -267.2040100097656, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5123653411865234, |
| "rewards/margins": 12.867091178894043, |
| "rewards/rejected": -9.354723930358887, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.924008350730689, |
| "grad_norm": 0.0037957087624818087, |
| "learning_rate": 4.496400212480165e-05, |
| "logits/chosen": 0.5482458472251892, |
| "logits/rejected": 1.2564729452133179, |
| "logps/chosen": -150.4049072265625, |
| "logps/rejected": -185.18603515625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.50251841545105, |
| "rewards/margins": 12.41536808013916, |
| "rewards/rejected": -8.912851333618164, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.9306889352818373, |
| "grad_norm": 0.00025647124857641757, |
| "learning_rate": 4.445940586304742e-05, |
| "logits/chosen": 0.2872941493988037, |
| "logits/rejected": 1.2851579189300537, |
| "logps/chosen": -186.39442443847656, |
| "logps/rejected": -196.6807861328125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.469214916229248, |
| "rewards/margins": 12.677154541015625, |
| "rewards/rejected": -9.207941055297852, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.9373695198329854, |
| "grad_norm": 0.002922415267676115, |
| "learning_rate": 4.3956565128623996e-05, |
| "logits/chosen": 0.42484214901924133, |
| "logits/rejected": 0.8704441785812378, |
| "logps/chosen": -172.12071228027344, |
| "logps/rejected": -256.0958557128906, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3208203315734863, |
| "rewards/margins": 13.219375610351562, |
| "rewards/rejected": -9.898555755615234, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.9440501043841336, |
| "grad_norm": 0.00034838722785934806, |
| "learning_rate": 4.3455504759345056e-05, |
| "logits/chosen": 0.3946557641029358, |
| "logits/rejected": 1.1197338104248047, |
| "logps/chosen": -186.8988037109375, |
| "logps/rejected": -234.5850830078125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.767469882965088, |
| "rewards/margins": 13.301392555236816, |
| "rewards/rejected": -9.53392219543457, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.9507306889352818, |
| "grad_norm": 0.0006544087664224207, |
| "learning_rate": 4.295624950508295e-05, |
| "logits/chosen": 0.4687265455722809, |
| "logits/rejected": 1.0979089736938477, |
| "logps/chosen": -126.43232727050781, |
| "logps/rejected": -183.7377166748047, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1935901641845703, |
| "rewards/margins": 11.762184143066406, |
| "rewards/rejected": -8.56859302520752, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.95741127348643, |
| "grad_norm": 0.00044172187335789204, |
| "learning_rate": 4.24588240265466e-05, |
| "logits/chosen": 0.8599841594696045, |
| "logits/rejected": 0.7370025515556335, |
| "logps/chosen": -138.1736297607422, |
| "logps/rejected": -296.47723388671875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7924275398254395, |
| "rewards/margins": 14.83957290649414, |
| "rewards/rejected": -11.04714584350586, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.9640918580375781, |
| "grad_norm": 0.001833213260397315, |
| "learning_rate": 4.1963252894063056e-05, |
| "logits/chosen": 0.5297300815582275, |
| "logits/rejected": 0.8016726970672607, |
| "logps/chosen": -158.78738403320312, |
| "logps/rejected": -272.85113525390625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3024632930755615, |
| "rewards/margins": 13.22446060180664, |
| "rewards/rejected": -9.921996116638184, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.9707724425887265, |
| "grad_norm": 0.0010583212133497, |
| "learning_rate": 4.146956058636406e-05, |
| "logits/chosen": 0.3446853756904602, |
| "logits/rejected": 1.0419793128967285, |
| "logps/chosen": -150.81710815429688, |
| "logps/rejected": -210.25636291503906, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4412221908569336, |
| "rewards/margins": 12.799392700195312, |
| "rewards/rejected": -9.358169555664062, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.9774530271398747, |
| "grad_norm": 0.001035829191096127, |
| "learning_rate": 4.097777148937663e-05, |
| "logits/chosen": 0.5304365754127502, |
| "logits/rejected": 0.9279758334159851, |
| "logps/chosen": -155.19656372070312, |
| "logps/rejected": -245.78414916992188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.409811496734619, |
| "rewards/margins": 12.613784790039062, |
| "rewards/rejected": -9.203972816467285, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.984133611691023, |
| "grad_norm": 0.0017729728715494275, |
| "learning_rate": 4.048790989501893e-05, |
| "logits/chosen": 0.5875007510185242, |
| "logits/rejected": 1.1323366165161133, |
| "logps/chosen": -140.05502319335938, |
| "logps/rejected": -218.2332000732422, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.322587251663208, |
| "rewards/margins": 13.057868003845215, |
| "rewards/rejected": -9.73528003692627, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.9908141962421713, |
| "grad_norm": 0.0009540588362142444, |
| "learning_rate": 4.0000000000000024e-05, |
| "logits/chosen": 0.5278233289718628, |
| "logits/rejected": 1.2491222620010376, |
| "logps/chosen": -195.366943359375, |
| "logps/rejected": -243.4252471923828, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.871669292449951, |
| "rewards/margins": 13.229048728942871, |
| "rewards/rejected": -9.357379913330078, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.9974947807933194, |
| "grad_norm": 0.000431572028901428, |
| "learning_rate": 3.951406590462479e-05, |
| "logits/chosen": 0.8149689435958862, |
| "logits/rejected": 0.9918652772903442, |
| "logps/chosen": -132.57635498046875, |
| "logps/rejected": -227.2015380859375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2055227756500244, |
| "rewards/margins": 12.865432739257812, |
| "rewards/rejected": -9.659910202026367, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.0041753653444676, |
| "grad_norm": 0.000286836177110672, |
| "learning_rate": 3.9030131611603605e-05, |
| "logits/chosen": 0.5436272621154785, |
| "logits/rejected": 1.0624536275863647, |
| "logps/chosen": -145.8037872314453, |
| "logps/rejected": -211.19740295410156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.796835422515869, |
| "rewards/margins": 13.231882095336914, |
| "rewards/rejected": -9.435047149658203, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.010855949895616, |
| "grad_norm": 0.012853999622166157, |
| "learning_rate": 3.854822102486654e-05, |
| "logits/chosen": 0.36405637860298157, |
| "logits/rejected": 1.1111705303192139, |
| "logps/chosen": -160.4470672607422, |
| "logps/rejected": -197.8324737548828, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6854968070983887, |
| "rewards/margins": 12.909117698669434, |
| "rewards/rejected": -9.223621368408203, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.017536534446764, |
| "grad_norm": 0.0005720301996916533, |
| "learning_rate": 3.8068357948382715e-05, |
| "logits/chosen": 0.554153561592102, |
| "logits/rejected": 1.1136068105697632, |
| "logps/chosen": -136.82049560546875, |
| "logps/rejected": -201.56800842285156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.509455442428589, |
| "rewards/margins": 13.126138687133789, |
| "rewards/rejected": -9.616683006286621, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.024217118997912, |
| "grad_norm": 0.0013834366109222174, |
| "learning_rate": 3.759056608498451e-05, |
| "logits/chosen": 0.23741646111011505, |
| "logits/rejected": 1.0043878555297852, |
| "logps/chosen": -145.64682006835938, |
| "logps/rejected": -184.83242797851562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1809093952178955, |
| "rewards/margins": 12.4395751953125, |
| "rewards/rejected": -9.258666038513184, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.0308977035490607, |
| "grad_norm": 0.00023377075558528304, |
| "learning_rate": 3.7114869035196815e-05, |
| "logits/chosen": 0.5432067513465881, |
| "logits/rejected": 1.153996467590332, |
| "logps/chosen": -167.0084686279297, |
| "logps/rejected": -215.29714965820312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3663644790649414, |
| "rewards/margins": 12.530076026916504, |
| "rewards/rejected": -9.163711547851562, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.037578288100209, |
| "grad_norm": 0.0050998348742723465, |
| "learning_rate": 3.6641290296071134e-05, |
| "logits/chosen": 0.6319041848182678, |
| "logits/rejected": 1.0745893716812134, |
| "logps/chosen": -189.58924865722656, |
| "logps/rejected": -271.85601806640625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.620217800140381, |
| "rewards/margins": 13.474288940429688, |
| "rewards/rejected": -9.854070663452148, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.044258872651357, |
| "grad_norm": 0.0010785937774926424, |
| "learning_rate": 3.616985326002506e-05, |
| "logits/chosen": 0.603534460067749, |
| "logits/rejected": 0.9770067930221558, |
| "logps/chosen": -131.9829864501953, |
| "logps/rejected": -214.7379608154297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.453455924987793, |
| "rewards/margins": 12.670695304870605, |
| "rewards/rejected": -9.217240333557129, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.0509394572025053, |
| "grad_norm": 0.00044056694605387747, |
| "learning_rate": 3.570058121368678e-05, |
| "logits/chosen": 0.7243631482124329, |
| "logits/rejected": 1.2042169570922852, |
| "logps/chosen": -167.11720275878906, |
| "logps/rejected": -231.396728515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.567816972732544, |
| "rewards/margins": 12.468470573425293, |
| "rewards/rejected": -8.900653839111328, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.0576200417536534, |
| "grad_norm": 0.0021745203994214535, |
| "learning_rate": 3.5233497336744907e-05, |
| "logits/chosen": 0.3905419111251831, |
| "logits/rejected": 0.9584081172943115, |
| "logps/chosen": -159.60000610351562, |
| "logps/rejected": -249.20263671875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5410196781158447, |
| "rewards/margins": 13.085066795349121, |
| "rewards/rejected": -9.544046401977539, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.0643006263048016, |
| "grad_norm": 0.003901979187503457, |
| "learning_rate": 3.476862470080329e-05, |
| "logits/chosen": 0.3295363187789917, |
| "logits/rejected": 1.1402533054351807, |
| "logps/chosen": -155.79783630371094, |
| "logps/rejected": -215.50086975097656, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6909849643707275, |
| "rewards/margins": 12.775930404663086, |
| "rewards/rejected": -9.084944725036621, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.0709812108559498, |
| "grad_norm": 0.0003340893890708685, |
| "learning_rate": 3.4305986268241716e-05, |
| "logits/chosen": 0.5111173391342163, |
| "logits/rejected": 1.2973905801773071, |
| "logps/chosen": -179.60287475585938, |
| "logps/rejected": -192.02505493164062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3571372032165527, |
| "rewards/margins": 11.964003562927246, |
| "rewards/rejected": -8.606865882873535, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.077661795407098, |
| "grad_norm": 0.001990803051739931, |
| "learning_rate": 3.3845604891081396e-05, |
| "logits/chosen": 0.4202510118484497, |
| "logits/rejected": 0.9384629130363464, |
| "logps/chosen": -138.1681365966797, |
| "logps/rejected": -208.4151153564453, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3490982055664062, |
| "rewards/margins": 12.660693168640137, |
| "rewards/rejected": -9.311594009399414, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.0843423799582466, |
| "grad_norm": 0.0005957921384833753, |
| "learning_rate": 3.338750330985638e-05, |
| "logits/chosen": 0.28172874450683594, |
| "logits/rejected": 1.208389163017273, |
| "logps/chosen": -174.9169158935547, |
| "logps/rejected": -188.84228515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.366245746612549, |
| "rewards/margins": 12.091064453125, |
| "rewards/rejected": -8.724818229675293, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.0910229645093947, |
| "grad_norm": 0.00024528999347239733, |
| "learning_rate": 3.293170415249015e-05, |
| "logits/chosen": 0.39942628145217896, |
| "logits/rejected": 0.9915326833724976, |
| "logps/chosen": -144.50625610351562, |
| "logps/rejected": -200.2137451171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.865654706954956, |
| "rewards/margins": 12.166391372680664, |
| "rewards/rejected": -9.300737380981445, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.097703549060543, |
| "grad_norm": 0.000993763329461217, |
| "learning_rate": 3.247822993317809e-05, |
| "logits/chosen": 0.22010907530784607, |
| "logits/rejected": 1.199426531791687, |
| "logps/chosen": -169.88729858398438, |
| "logps/rejected": -175.47982788085938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4611406326293945, |
| "rewards/margins": 12.266228675842285, |
| "rewards/rejected": -8.80508804321289, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.104384133611691, |
| "grad_norm": 0.0003548046515788883, |
| "learning_rate": 3.202710305127518e-05, |
| "logits/chosen": 0.47131308913230896, |
| "logits/rejected": 1.1059740781784058, |
| "logps/chosen": -186.29026794433594, |
| "logps/rejected": -250.04234313964844, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3379273414611816, |
| "rewards/margins": 13.453146934509277, |
| "rewards/rejected": -10.115219116210938, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.1110647181628392, |
| "grad_norm": 0.0008554239757359028, |
| "learning_rate": 3.157834579018972e-05, |
| "logits/chosen": 0.513791024684906, |
| "logits/rejected": 1.2864115238189697, |
| "logps/chosen": -196.79559326171875, |
| "logps/rejected": -204.86273193359375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6547086238861084, |
| "rewards/margins": 12.088726997375488, |
| "rewards/rejected": -8.434019088745117, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.1177453027139874, |
| "grad_norm": 0.003397479420527816, |
| "learning_rate": 3.113198031628267e-05, |
| "logits/chosen": 0.6464900374412537, |
| "logits/rejected": 1.0889447927474976, |
| "logps/chosen": -180.78738403320312, |
| "logps/rejected": -245.6759033203125, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6549572944641113, |
| "rewards/margins": 13.39869499206543, |
| "rewards/rejected": -9.74373722076416, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.1244258872651356, |
| "grad_norm": 0.0010641631670296192, |
| "learning_rate": 3.0688028677772615e-05, |
| "logits/chosen": 0.11827311664819717, |
| "logits/rejected": 1.2251101732254028, |
| "logps/chosen": -204.45823669433594, |
| "logps/rejected": -189.8067626953125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.575129985809326, |
| "rewards/margins": 12.471538543701172, |
| "rewards/rejected": -8.896408081054688, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.1311064718162838, |
| "grad_norm": 0.0023302403278648853, |
| "learning_rate": 3.0246512803646787e-05, |
| "logits/chosen": 0.5905887484550476, |
| "logits/rejected": 1.3115737438201904, |
| "logps/chosen": -160.69956970214844, |
| "logps/rejected": -200.94296264648438, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5085368156433105, |
| "rewards/margins": 13.034270286560059, |
| "rewards/rejected": -9.525733947753906, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.137787056367432, |
| "grad_norm": 0.002920424798503518, |
| "learning_rate": 2.980745450257782e-05, |
| "logits/chosen": 0.793439507484436, |
| "logits/rejected": 1.0042898654937744, |
| "logps/chosen": -121.60202026367188, |
| "logps/rejected": -215.73321533203125, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.618967294692993, |
| "rewards/margins": 13.545495986938477, |
| "rewards/rejected": -9.926527976989746, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.1444676409185806, |
| "grad_norm": 0.002082268940284848, |
| "learning_rate": 2.9370875461846675e-05, |
| "logits/chosen": 0.6137429475784302, |
| "logits/rejected": 1.13056218624115, |
| "logps/chosen": -131.8321533203125, |
| "logps/rejected": -184.77304077148438, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.843712329864502, |
| "rewards/margins": 11.69206428527832, |
| "rewards/rejected": -8.848353385925293, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.1511482254697287, |
| "grad_norm": 0.0028337843250483274, |
| "learning_rate": 2.8936797246271074e-05, |
| "logits/chosen": 0.6332962512969971, |
| "logits/rejected": 0.9748692512512207, |
| "logps/chosen": -129.75018310546875, |
| "logps/rejected": -253.5199432373047, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.8473052978515625, |
| "rewards/margins": 12.578422546386719, |
| "rewards/rejected": -9.731117248535156, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.157828810020877, |
| "grad_norm": 0.0006396812968887389, |
| "learning_rate": 2.8505241297140674e-05, |
| "logits/chosen": 0.5061097145080566, |
| "logits/rejected": 0.7550954818725586, |
| "logps/chosen": -132.68972778320312, |
| "logps/rejected": -245.25881958007812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3926968574523926, |
| "rewards/margins": 13.062835693359375, |
| "rewards/rejected": -9.67013931274414, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.164509394572025, |
| "grad_norm": 0.010461756028234959, |
| "learning_rate": 2.807622893115773e-05, |
| "logits/chosen": 0.2978319525718689, |
| "logits/rejected": 1.3880324363708496, |
| "logps/chosen": -172.8788299560547, |
| "logps/rejected": -175.39361572265625, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.776562452316284, |
| "rewards/margins": 12.051597595214844, |
| "rewards/rejected": -8.27503490447998, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.1711899791231732, |
| "grad_norm": 0.0011066849110648036, |
| "learning_rate": 2.7649781339384224e-05, |
| "logits/chosen": 0.4691530466079712, |
| "logits/rejected": 1.1270420551300049, |
| "logps/chosen": -153.82896423339844, |
| "logps/rejected": -219.26405334472656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.471506357192993, |
| "rewards/margins": 12.436012268066406, |
| "rewards/rejected": -8.964506149291992, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.1778705636743214, |
| "grad_norm": 0.0004933910095132887, |
| "learning_rate": 2.7225919586195133e-05, |
| "logits/chosen": 0.42578768730163574, |
| "logits/rejected": 0.9353082776069641, |
| "logps/chosen": -145.8098907470703, |
| "logps/rejected": -232.33059692382812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.495699882507324, |
| "rewards/margins": 13.425835609436035, |
| "rewards/rejected": -9.930134773254395, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.1845511482254696, |
| "grad_norm": 0.0005303247016854584, |
| "learning_rate": 2.6804664608238035e-05, |
| "logits/chosen": 0.38650327920913696, |
| "logits/rejected": 0.703351616859436, |
| "logps/chosen": -127.17084503173828, |
| "logps/rejected": -228.81396484375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2400314807891846, |
| "rewards/margins": 14.320934295654297, |
| "rewards/rejected": -11.080903053283691, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.1912317327766178, |
| "grad_norm": 0.0009748793672770262, |
| "learning_rate": 2.6386037213398786e-05, |
| "logits/chosen": 0.4487416744232178, |
| "logits/rejected": 1.1172525882720947, |
| "logps/chosen": -160.41439819335938, |
| "logps/rejected": -252.93212890625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4267497062683105, |
| "rewards/margins": 12.388287544250488, |
| "rewards/rejected": -8.961536407470703, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.1979123173277664, |
| "grad_norm": 0.0002127394254785031, |
| "learning_rate": 2.5970058079773816e-05, |
| "logits/chosen": 0.5297442674636841, |
| "logits/rejected": 1.090707778930664, |
| "logps/chosen": -146.61062622070312, |
| "logps/rejected": -222.10076904296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.185678720474243, |
| "rewards/margins": 12.19675064086914, |
| "rewards/rejected": -9.011072158813477, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.2045929018789145, |
| "grad_norm": 0.00024031591601669788, |
| "learning_rate": 2.5556747754648718e-05, |
| "logits/chosen": 0.3269326686859131, |
| "logits/rejected": 1.258778691291809, |
| "logps/chosen": -171.55960083007812, |
| "logps/rejected": -198.94358825683594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.729045867919922, |
| "rewards/margins": 12.932186126708984, |
| "rewards/rejected": -9.203140258789062, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.2112734864300627, |
| "grad_norm": 0.0011042467085644603, |
| "learning_rate": 2.5146126653483355e-05, |
| "logits/chosen": 0.5408863425254822, |
| "logits/rejected": 0.9578089118003845, |
| "logps/chosen": -144.37818908691406, |
| "logps/rejected": -230.30712890625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7335550785064697, |
| "rewards/margins": 13.26760196685791, |
| "rewards/rejected": -9.53404712677002, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.217954070981211, |
| "grad_norm": 0.000509345147293061, |
| "learning_rate": 2.4738215058903343e-05, |
| "logits/chosen": 0.23819413781166077, |
| "logits/rejected": 1.2504065036773682, |
| "logps/chosen": -200.8672637939453, |
| "logps/rejected": -195.00680541992188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.456376075744629, |
| "rewards/margins": 11.623927116394043, |
| "rewards/rejected": -8.167550086975098, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.224634655532359, |
| "grad_norm": 0.0002333044249098748, |
| "learning_rate": 2.4333033119698267e-05, |
| "logits/chosen": 0.47582709789276123, |
| "logits/rejected": 0.9356101155281067, |
| "logps/chosen": -141.839599609375, |
| "logps/rejected": -238.50204467773438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.40701961517334, |
| "rewards/margins": 13.678300857543945, |
| "rewards/rejected": -10.271280288696289, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.2313152400835072, |
| "grad_norm": 0.0004591036995407194, |
| "learning_rate": 2.393060084982639e-05, |
| "logits/chosen": 0.37793320417404175, |
| "logits/rejected": 1.3741904497146606, |
| "logps/chosen": -185.0458984375, |
| "logps/rejected": -205.19021606445312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4873390197753906, |
| "rewards/margins": 13.392151832580566, |
| "rewards/rejected": -9.90481185913086, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.2379958246346554, |
| "grad_norm": 0.00019663709099404514, |
| "learning_rate": 2.3530938127426098e-05, |
| "logits/chosen": 0.4617392420768738, |
| "logits/rejected": 1.034355878829956, |
| "logps/chosen": -171.1890106201172, |
| "logps/rejected": -242.88619995117188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.821230411529541, |
| "rewards/margins": 14.595293045043945, |
| "rewards/rejected": -10.774063110351562, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.2446764091858036, |
| "grad_norm": 0.0010577983921393752, |
| "learning_rate": 2.3134064693834022e-05, |
| "logits/chosen": 0.5003147721290588, |
| "logits/rejected": 1.3415861129760742, |
| "logps/chosen": -184.5679473876953, |
| "logps/rejected": -215.14340209960938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.04392671585083, |
| "rewards/margins": 12.149682998657227, |
| "rewards/rejected": -9.105756759643555, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.2513569937369518, |
| "grad_norm": 0.0007893574074842036, |
| "learning_rate": 2.274000015260988e-05, |
| "logits/chosen": 0.5415345430374146, |
| "logits/rejected": 0.9807635545730591, |
| "logps/chosen": -128.60086059570312, |
| "logps/rejected": -237.58474731445312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.333793878555298, |
| "rewards/margins": 13.319954872131348, |
| "rewards/rejected": -9.986162185668945, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.2580375782881004, |
| "grad_norm": 0.003091056365519762, |
| "learning_rate": 2.234876396856817e-05, |
| "logits/chosen": 0.5783103704452515, |
| "logits/rejected": 1.0742615461349487, |
| "logps/chosen": -175.44082641601562, |
| "logps/rejected": -240.58963012695312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.027278423309326, |
| "rewards/margins": 14.690441131591797, |
| "rewards/rejected": -10.663162231445312, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.2647181628392485, |
| "grad_norm": 0.0008333343430422246, |
| "learning_rate": 2.1960375466816685e-05, |
| "logits/chosen": 0.4626193344593048, |
| "logits/rejected": 1.3312710523605347, |
| "logps/chosen": -179.23594665527344, |
| "logps/rejected": -204.65113830566406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.346703290939331, |
| "rewards/margins": 12.753974914550781, |
| "rewards/rejected": -9.407270431518555, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.2713987473903967, |
| "grad_norm": 0.00037302737473510206, |
| "learning_rate": 2.1574853831802062e-05, |
| "logits/chosen": 0.592528760433197, |
| "logits/rejected": 1.1724703311920166, |
| "logps/chosen": -141.2781524658203, |
| "logps/rejected": -205.59986877441406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.087510824203491, |
| "rewards/margins": 11.936580657958984, |
| "rewards/rejected": -8.849069595336914, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.278079331941545, |
| "grad_norm": 0.00047880798229016364, |
| "learning_rate": 2.1192218106362004e-05, |
| "logits/chosen": 0.4639968276023865, |
| "logits/rejected": 1.1247094869613647, |
| "logps/chosen": -167.94326782226562, |
| "logps/rejected": -223.88681030273438, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.112114429473877, |
| "rewards/margins": 12.083959579467773, |
| "rewards/rejected": -8.971845626831055, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.284759916492693, |
| "grad_norm": 0.0006552104605361819, |
| "learning_rate": 2.0812487190784765e-05, |
| "logits/chosen": 0.4680987298488617, |
| "logits/rejected": 1.1858527660369873, |
| "logps/chosen": -148.18118286132812, |
| "logps/rejected": -187.73463439941406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.776710033416748, |
| "rewards/margins": 12.323975563049316, |
| "rewards/rejected": -8.54726505279541, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.2914405010438412, |
| "grad_norm": 0.0006427404005080462, |
| "learning_rate": 2.0435679841875517e-05, |
| "logits/chosen": 0.5988658666610718, |
| "logits/rejected": 0.8582466244697571, |
| "logps/chosen": -140.76695251464844, |
| "logps/rejected": -239.03146362304688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.353756904602051, |
| "rewards/margins": 13.393622398376465, |
| "rewards/rejected": -10.039865493774414, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.2981210855949894, |
| "grad_norm": 0.000361485785106197, |
| "learning_rate": 2.0061814672029964e-05, |
| "logits/chosen": 0.6378865838050842, |
| "logits/rejected": 1.3802528381347656, |
| "logps/chosen": -165.37449645996094, |
| "logps/rejected": -207.62039184570312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.260084390640259, |
| "rewards/margins": 12.411645889282227, |
| "rewards/rejected": -9.151561737060547, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.304801670146138, |
| "grad_norm": 0.002754014451056719, |
| "learning_rate": 1.9690910148314746e-05, |
| "logits/chosen": 0.32854142785072327, |
| "logits/rejected": 1.3232519626617432, |
| "logps/chosen": -195.93017578125, |
| "logps/rejected": -183.62518310546875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4481635093688965, |
| "rewards/margins": 12.422575950622559, |
| "rewards/rejected": -8.974411964416504, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.311482254697286, |
| "grad_norm": 0.0057357000187039375, |
| "learning_rate": 1.9322984591555593e-05, |
| "logits/chosen": 0.5361195802688599, |
| "logits/rejected": 1.024005651473999, |
| "logps/chosen": -166.576904296875, |
| "logps/rejected": -230.3949432373047, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.127915859222412, |
| "rewards/margins": 12.562141418457031, |
| "rewards/rejected": -9.434226036071777, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.3181628392484344, |
| "grad_norm": 0.0005040675168856978, |
| "learning_rate": 1.8958056175432064e-05, |
| "logits/chosen": 0.4197937250137329, |
| "logits/rejected": 0.8660867214202881, |
| "logps/chosen": -144.34555053710938, |
| "logps/rejected": -255.72872924804688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1396241188049316, |
| "rewards/margins": 13.073751449584961, |
| "rewards/rejected": -9.934127807617188, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.3248434237995825, |
| "grad_norm": 0.000678808952216059, |
| "learning_rate": 1.8596142925580008e-05, |
| "logits/chosen": 0.6340179443359375, |
| "logits/rejected": 0.8392489552497864, |
| "logps/chosen": -140.9353485107422, |
| "logps/rejected": -238.82012939453125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.632033586502075, |
| "rewards/margins": 13.527902603149414, |
| "rewards/rejected": -9.895870208740234, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.3315240083507307, |
| "grad_norm": 0.003711126046255231, |
| "learning_rate": 1.823726271870122e-05, |
| "logits/chosen": 0.6699475049972534, |
| "logits/rejected": 1.1883944272994995, |
| "logps/chosen": -143.63088989257812, |
| "logps/rejected": -210.8107147216797, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.578331470489502, |
| "rewards/margins": 12.837823867797852, |
| "rewards/rejected": -9.259491920471191, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.338204592901879, |
| "grad_norm": 0.00021632130665238947, |
| "learning_rate": 1.7881433281680297e-05, |
| "logits/chosen": 0.5508967041969299, |
| "logits/rejected": 1.1000094413757324, |
| "logps/chosen": -149.94349670410156, |
| "logps/rejected": -218.51022338867188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.072751522064209, |
| "rewards/margins": 12.319024085998535, |
| "rewards/rejected": -9.246273040771484, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.344885177453027, |
| "grad_norm": 0.003893829882144928, |
| "learning_rate": 1.752867219070912e-05, |
| "logits/chosen": 0.4527266323566437, |
| "logits/rejected": 1.2279529571533203, |
| "logps/chosen": -193.52725219726562, |
| "logps/rejected": -230.76287841796875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5734779834747314, |
| "rewards/margins": 13.716750144958496, |
| "rewards/rejected": -10.143270492553711, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.351565762004175, |
| "grad_norm": 0.004190846811980009, |
| "learning_rate": 1.717899687041861e-05, |
| "logits/chosen": 0.4203382134437561, |
| "logits/rejected": 0.896210253238678, |
| "logps/chosen": -174.0313262939453, |
| "logps/rejected": -229.40170288085938, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.109071969985962, |
| "rewards/margins": 12.539839744567871, |
| "rewards/rejected": -9.430768013000488, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.3582463465553234, |
| "grad_norm": 0.0006010103388689458, |
| "learning_rate": 1.6832424593018145e-05, |
| "logits/chosen": 0.3124818801879883, |
| "logits/rejected": 1.187449336051941, |
| "logps/chosen": -175.65945434570312, |
| "logps/rejected": -194.64474487304688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.353351354598999, |
| "rewards/margins": 12.400053977966309, |
| "rewards/rejected": -9.04670238494873, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.364926931106472, |
| "grad_norm": 0.0013186561409384012, |
| "learning_rate": 1.648897247744224e-05, |
| "logits/chosen": 0.5556175112724304, |
| "logits/rejected": 0.9724099636077881, |
| "logps/chosen": -175.44541931152344, |
| "logps/rejected": -247.06283569335938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4358012676239014, |
| "rewards/margins": 13.10071086883545, |
| "rewards/rejected": -9.664909362792969, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.37160751565762, |
| "grad_norm": 0.0004016379243694246, |
| "learning_rate": 1.6148657488505116e-05, |
| "logits/chosen": 0.5522690415382385, |
| "logits/rejected": 1.1395349502563477, |
| "logps/chosen": -172.11297607421875, |
| "logps/rejected": -244.9354248046875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4882326126098633, |
| "rewards/margins": 13.11169719696045, |
| "rewards/rejected": -9.623465538024902, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.3782881002087684, |
| "grad_norm": 0.003400665707886219, |
| "learning_rate": 1.581149643606257e-05, |
| "logits/chosen": 0.4829326868057251, |
| "logits/rejected": 1.142820954322815, |
| "logps/chosen": -204.84255981445312, |
| "logps/rejected": -255.23028564453125, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.039217472076416, |
| "rewards/margins": 12.172881126403809, |
| "rewards/rejected": -9.13366413116455, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.3849686847599165, |
| "grad_norm": 0.00029609608463943005, |
| "learning_rate": 1.5477505974181858e-05, |
| "logits/chosen": 0.4647904932498932, |
| "logits/rejected": 1.0525555610656738, |
| "logps/chosen": -152.51803588867188, |
| "logps/rejected": -240.22915649414062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.57979154586792, |
| "rewards/margins": 14.005621910095215, |
| "rewards/rejected": -10.425830841064453, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.3916492693110647, |
| "grad_norm": 0.0010005651274695992, |
| "learning_rate": 1.5146702600318795e-05, |
| "logits/chosen": 0.6249891519546509, |
| "logits/rejected": 1.248299241065979, |
| "logps/chosen": -153.75282287597656, |
| "logps/rejected": -212.9418182373047, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2799882888793945, |
| "rewards/margins": 12.723299980163574, |
| "rewards/rejected": -9.44331169128418, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.398329853862213, |
| "grad_norm": 0.0012129038805142045, |
| "learning_rate": 1.4819102654503143e-05, |
| "logits/chosen": 0.5324082970619202, |
| "logits/rejected": 0.9918537139892578, |
| "logps/chosen": -194.02386474609375, |
| "logps/rejected": -250.39137268066406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.799360513687134, |
| "rewards/margins": 13.776371955871582, |
| "rewards/rejected": -9.977011680603027, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.405010438413361, |
| "grad_norm": 0.0011025875573977828, |
| "learning_rate": 1.4494722318531272e-05, |
| "logits/chosen": 0.5399448871612549, |
| "logits/rejected": 0.688692569732666, |
| "logps/chosen": -115.42459869384766, |
| "logps/rejected": -260.16656494140625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4390218257904053, |
| "rewards/margins": 13.882972717285156, |
| "rewards/rejected": -10.443949699401855, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.411691022964509, |
| "grad_norm": 0.00040039754821918905, |
| "learning_rate": 1.4173577615167014e-05, |
| "logits/chosen": 0.5479378700256348, |
| "logits/rejected": 1.0258996486663818, |
| "logps/chosen": -148.38746643066406, |
| "logps/rejected": -235.00123596191406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.461238384246826, |
| "rewards/margins": 12.922686576843262, |
| "rewards/rejected": -9.461446762084961, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.418371607515658, |
| "grad_norm": 0.00040944863576442003, |
| "learning_rate": 1.3855684407350087e-05, |
| "logits/chosen": 0.305247962474823, |
| "logits/rejected": 0.8850731253623962, |
| "logps/chosen": -183.35250854492188, |
| "logps/rejected": -245.67764282226562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0578300952911377, |
| "rewards/margins": 12.985921859741211, |
| "rewards/rejected": -9.928091049194336, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.425052192066806, |
| "grad_norm": 0.0013041149359196424, |
| "learning_rate": 1.3541058397412719e-05, |
| "logits/chosen": 0.9205292463302612, |
| "logits/rejected": 1.027268409729004, |
| "logps/chosen": -139.70252990722656, |
| "logps/rejected": -251.70785522460938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5465292930603027, |
| "rewards/margins": 14.001523971557617, |
| "rewards/rejected": -10.454995155334473, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.431732776617954, |
| "grad_norm": 0.002102568047121167, |
| "learning_rate": 1.3229715126303835e-05, |
| "logits/chosen": 0.5301153659820557, |
| "logits/rejected": 1.125875473022461, |
| "logps/chosen": -195.70343017578125, |
| "logps/rejected": -248.96484375, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5196890830993652, |
| "rewards/margins": 13.107810974121094, |
| "rewards/rejected": -9.588122367858887, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.4384133611691023, |
| "grad_norm": 0.000353420153260231, |
| "learning_rate": 1.292166997282152e-05, |
| "logits/chosen": 0.5683207511901855, |
| "logits/rejected": 1.1430654525756836, |
| "logps/chosen": -187.99229431152344, |
| "logps/rejected": -233.15504455566406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5683434009552, |
| "rewards/margins": 12.799301147460938, |
| "rewards/rejected": -9.230957984924316, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.4450939457202505, |
| "grad_norm": 0.0007050547283142805, |
| "learning_rate": 1.2616938152853435e-05, |
| "logits/chosen": 0.7088552117347717, |
| "logits/rejected": 1.1877071857452393, |
| "logps/chosen": -175.97979736328125, |
| "logps/rejected": -242.88336181640625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2787091732025146, |
| "rewards/margins": 13.82058048248291, |
| "rewards/rejected": -10.5418701171875, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.4517745302713987, |
| "grad_norm": 0.0010725195752456784, |
| "learning_rate": 1.2315534718625082e-05, |
| "logits/chosen": 0.5767889022827148, |
| "logits/rejected": 1.0672720670700073, |
| "logps/chosen": -131.60899353027344, |
| "logps/rejected": -218.16946411132812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5861661434173584, |
| "rewards/margins": 12.36983871459961, |
| "rewards/rejected": -8.783672332763672, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.458455114822547, |
| "grad_norm": 0.0022976077161729336, |
| "learning_rate": 1.2017474557956424e-05, |
| "logits/chosen": 0.5557492971420288, |
| "logits/rejected": 1.0859577655792236, |
| "logps/chosen": -138.25811767578125, |
| "logps/rejected": -223.6555633544922, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.32340669631958, |
| "rewards/margins": 12.937990188598633, |
| "rewards/rejected": -9.614583015441895, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.465135699373695, |
| "grad_norm": 0.0001732637465465814, |
| "learning_rate": 1.1722772393526402e-05, |
| "logits/chosen": 0.3511442542076111, |
| "logits/rejected": 1.180907130241394, |
| "logps/chosen": -212.04290771484375, |
| "logps/rejected": -232.52906799316406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6756913661956787, |
| "rewards/margins": 14.446080207824707, |
| "rewards/rejected": -10.770389556884766, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.471816283924843, |
| "grad_norm": 0.0014034606283530593, |
| "learning_rate": 1.1431442782145878e-05, |
| "logits/chosen": 0.4913908839225769, |
| "logits/rejected": 1.1496714353561401, |
| "logps/chosen": -186.36854553222656, |
| "logps/rejected": -201.04025268554688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4986705780029297, |
| "rewards/margins": 12.48499870300293, |
| "rewards/rejected": -8.986328125, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.478496868475992, |
| "grad_norm": 0.005758744198828936, |
| "learning_rate": 1.1143500114038335e-05, |
| "logits/chosen": 0.4534023106098175, |
| "logits/rejected": 1.1205188035964966, |
| "logps/chosen": -162.62062072753906, |
| "logps/rejected": -195.78395080566406, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2878878116607666, |
| "rewards/margins": 13.010221481323242, |
| "rewards/rejected": -9.722332954406738, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.48517745302714, |
| "grad_norm": 0.006383196916431189, |
| "learning_rate": 1.0858958612129346e-05, |
| "logits/chosen": 0.4629024565219879, |
| "logits/rejected": 1.1753510236740112, |
| "logps/chosen": -151.94247436523438, |
| "logps/rejected": -198.45919799804688, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5583202838897705, |
| "rewards/margins": 13.165674209594727, |
| "rewards/rejected": -9.607353210449219, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.491858037578288, |
| "grad_norm": 0.00029526828438974917, |
| "learning_rate": 1.0577832331343835e-05, |
| "logits/chosen": 0.46336808800697327, |
| "logits/rejected": 0.9336625933647156, |
| "logps/chosen": -151.4046630859375, |
| "logps/rejected": -246.76052856445312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5535144805908203, |
| "rewards/margins": 14.210418701171875, |
| "rewards/rejected": -10.656903266906738, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.4985386221294363, |
| "grad_norm": 0.001064629526808858, |
| "learning_rate": 1.0300135157911985e-05, |
| "logits/chosen": 0.3790948688983917, |
| "logits/rejected": 1.0435997247695923, |
| "logps/chosen": -197.76539611816406, |
| "logps/rejected": -259.93133544921875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1729063987731934, |
| "rewards/margins": 13.631752967834473, |
| "rewards/rejected": -10.45884895324707, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.5052192066805845, |
| "grad_norm": 0.00039556881529279053, |
| "learning_rate": 1.0025880808683133e-05, |
| "logits/chosen": 0.7378631830215454, |
| "logits/rejected": 1.0038983821868896, |
| "logps/chosen": -138.7704315185547, |
| "logps/rejected": -267.25146484375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.345494270324707, |
| "rewards/margins": 13.454019546508789, |
| "rewards/rejected": -10.108526229858398, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.5118997912317327, |
| "grad_norm": 0.0034411298111081123, |
| "learning_rate": 9.755082830448477e-06, |
| "logits/chosen": 0.6844637393951416, |
| "logits/rejected": 1.1692044734954834, |
| "logps/chosen": -152.147216796875, |
| "logps/rejected": -240.3649139404297, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.52048397064209, |
| "rewards/margins": 13.183599472045898, |
| "rewards/rejected": -9.663114547729492, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.518580375782881, |
| "grad_norm": 0.005600621923804283, |
| "learning_rate": 9.487754599271714e-06, |
| "logits/chosen": 0.5809310674667358, |
| "logits/rejected": 1.2069010734558105, |
| "logps/chosen": -136.8238067626953, |
| "logps/rejected": -205.76683044433594, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.8006017208099365, |
| "rewards/margins": 13.183314323425293, |
| "rewards/rejected": -9.382713317871094, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.5252609603340295, |
| "grad_norm": 0.0022874092683196068, |
| "learning_rate": 9.223909319828448e-06, |
| "logits/chosen": 0.2795962691307068, |
| "logits/rejected": 0.9856559634208679, |
| "logps/chosen": -157.48626708984375, |
| "logps/rejected": -198.1659698486328, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.247584342956543, |
| "rewards/margins": 11.768815994262695, |
| "rewards/rejected": -8.521230697631836, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.5319415448851776, |
| "grad_norm": 0.0010204276768490672, |
| "learning_rate": 8.96356002475388e-06, |
| "logits/chosen": 0.4792221188545227, |
| "logits/rejected": 1.0078630447387695, |
| "logps/chosen": -155.52247619628906, |
| "logps/rejected": -202.2192840576172, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.309563159942627, |
| "rewards/margins": 12.788346290588379, |
| "rewards/rejected": -9.478784561157227, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.538622129436326, |
| "grad_norm": 0.002153388923034072, |
| "learning_rate": 8.706719573999166e-06, |
| "logits/chosen": 0.697721540927887, |
| "logits/rejected": 1.042283058166504, |
| "logps/chosen": -162.91702270507812, |
| "logps/rejected": -281.65045166015625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.598088502883911, |
| "rewards/margins": 14.490654945373535, |
| "rewards/rejected": -10.89256763458252, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.545302713987474, |
| "grad_norm": 0.0033939930144697428, |
| "learning_rate": 8.45340065419606e-06, |
| "logits/chosen": 0.2647485136985779, |
| "logits/rejected": 1.3145473003387451, |
| "logps/chosen": -185.42364501953125, |
| "logps/rejected": -169.73617553710938, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.510795831680298, |
| "rewards/margins": 11.805034637451172, |
| "rewards/rejected": -8.294239044189453, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.551983298538622, |
| "grad_norm": 0.0015864988090470433, |
| "learning_rate": 8.203615778030358e-06, |
| "logits/chosen": 0.3910033106803894, |
| "logits/rejected": 1.1290589570999146, |
| "logps/chosen": -173.88612365722656, |
| "logps/rejected": -196.45225524902344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.66280460357666, |
| "rewards/margins": 12.923318862915039, |
| "rewards/rejected": -9.260513305664062, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.5586638830897703, |
| "grad_norm": 0.0010147824650630355, |
| "learning_rate": 7.957377283623775e-06, |
| "logits/chosen": 0.60319983959198, |
| "logits/rejected": 1.0238069295883179, |
| "logps/chosen": -127.49191284179688, |
| "logps/rejected": -210.27113342285156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0743024349212646, |
| "rewards/margins": 12.275171279907227, |
| "rewards/rejected": -9.200868606567383, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.5653444676409185, |
| "grad_norm": 0.01197250746190548, |
| "learning_rate": 7.71469733392456e-06, |
| "logits/chosen": 0.37566909193992615, |
| "logits/rejected": 1.2771214246749878, |
| "logps/chosen": -190.87774658203125, |
| "logps/rejected": -219.00123596191406, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.659575939178467, |
| "rewards/margins": 13.514084815979004, |
| "rewards/rejected": -9.854509353637695, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.5720250521920667, |
| "grad_norm": 0.0003030757943633944, |
| "learning_rate": 7.475587916106674e-06, |
| "logits/chosen": 0.5149967670440674, |
| "logits/rejected": 1.0121179819107056, |
| "logps/chosen": -154.53530883789062, |
| "logps/rejected": -224.7379913330078, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.215111017227173, |
| "rewards/margins": 12.857205390930176, |
| "rewards/rejected": -9.642094612121582, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.578705636743215, |
| "grad_norm": 0.014440486207604408, |
| "learning_rate": 7.240060840977654e-06, |
| "logits/chosen": 0.5663985013961792, |
| "logits/rejected": 1.17019784450531, |
| "logps/chosen": -182.380615234375, |
| "logps/rejected": -235.32057189941406, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6025519371032715, |
| "rewards/margins": 12.833649635314941, |
| "rewards/rejected": -9.231098175048828, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.585386221294363, |
| "grad_norm": 0.0010652203345671296, |
| "learning_rate": 7.008127742395339e-06, |
| "logits/chosen": 0.7321959137916565, |
| "logits/rejected": 1.1040430068969727, |
| "logps/chosen": -130.3069610595703, |
| "logps/rejected": -207.42813110351562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.5689282417297363, |
| "rewards/margins": 12.16445255279541, |
| "rewards/rejected": -9.595523834228516, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.5920668058455116, |
| "grad_norm": 0.0002396242634858936, |
| "learning_rate": 6.779800076692989e-06, |
| "logits/chosen": 0.557038426399231, |
| "logits/rejected": 1.1275995969772339, |
| "logps/chosen": -152.3614959716797, |
| "logps/rejected": -220.40699768066406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3268535137176514, |
| "rewards/margins": 13.34984302520752, |
| "rewards/rejected": -10.022990226745605, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.59874739039666, |
| "grad_norm": 0.0006751392502337694, |
| "learning_rate": 6.555089122113671e-06, |
| "logits/chosen": 0.5842954516410828, |
| "logits/rejected": 1.438590407371521, |
| "logps/chosen": -168.39236450195312, |
| "logps/rejected": -195.80142211914062, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2229514122009277, |
| "rewards/margins": 12.052131652832031, |
| "rewards/rejected": -8.829178810119629, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.605427974947808, |
| "grad_norm": 0.00407389784231782, |
| "learning_rate": 6.334005978252968e-06, |
| "logits/chosen": 0.6431280374526978, |
| "logits/rejected": 1.1001299619674683, |
| "logps/chosen": -151.1575469970703, |
| "logps/rejected": -224.49334716796875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.211059331893921, |
| "rewards/margins": 12.562589645385742, |
| "rewards/rejected": -9.351531028747559, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.612108559498956, |
| "grad_norm": 0.00046424585161730647, |
| "learning_rate": 6.116561565510806e-06, |
| "logits/chosen": 0.6559157371520996, |
| "logits/rejected": 0.8570265173912048, |
| "logps/chosen": -117.59468841552734, |
| "logps/rejected": -238.4303436279297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.9642975330352783, |
| "rewards/margins": 11.914740562438965, |
| "rewards/rejected": -8.95044231414795, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.6187891440501043, |
| "grad_norm": 0.000544650130905211, |
| "learning_rate": 5.902766624551994e-06, |
| "logits/chosen": 0.4319482445716858, |
| "logits/rejected": 1.1018319129943848, |
| "logps/chosen": -171.65264892578125, |
| "logps/rejected": -213.88772583007812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3273277282714844, |
| "rewards/margins": 12.443902015686035, |
| "rewards/rejected": -9.11657428741455, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.6254697286012525, |
| "grad_norm": 0.0002654242271091789, |
| "learning_rate": 5.6926317157757825e-06, |
| "logits/chosen": 0.29998600482940674, |
| "logits/rejected": 1.0530532598495483, |
| "logps/chosen": -153.1922149658203, |
| "logps/rejected": -202.36703491210938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.9652388095855713, |
| "rewards/margins": 13.307126998901367, |
| "rewards/rejected": -9.341888427734375, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.632150313152401, |
| "grad_norm": 0.014262525364756584, |
| "learning_rate": 5.486167218794069e-06, |
| "logits/chosen": 0.49952131509780884, |
| "logits/rejected": 1.1781392097473145, |
| "logps/chosen": -181.53782653808594, |
| "logps/rejected": -179.46331787109375, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.316577911376953, |
| "rewards/margins": 12.234426498413086, |
| "rewards/rejected": -8.917847633361816, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.6388308977035493, |
| "grad_norm": 0.013617642223834991, |
| "learning_rate": 5.283383331918872e-06, |
| "logits/chosen": 0.6029332280158997, |
| "logits/rejected": 1.200974464416504, |
| "logps/chosen": -167.70443725585938, |
| "logps/rejected": -210.56065368652344, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7498412132263184, |
| "rewards/margins": 12.447210311889648, |
| "rewards/rejected": -8.697368621826172, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.6455114822546975, |
| "grad_norm": 0.01435225922614336, |
| "learning_rate": 5.084290071658462e-06, |
| "logits/chosen": 0.39357277750968933, |
| "logits/rejected": 1.2007172107696533, |
| "logps/chosen": -171.33279418945312, |
| "logps/rejected": -187.95843505859375, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.256293535232544, |
| "rewards/margins": 12.046555519104004, |
| "rewards/rejected": -8.790261268615723, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.6521920668058456, |
| "grad_norm": 0.002334900200366974, |
| "learning_rate": 4.888897272222677e-06, |
| "logits/chosen": 0.3714907169342041, |
| "logits/rejected": 1.1416910886764526, |
| "logps/chosen": -168.70376586914062, |
| "logps/rejected": -207.44976806640625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.877674102783203, |
| "rewards/margins": 12.441089630126953, |
| "rewards/rejected": -8.563414573669434, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.658872651356994, |
| "grad_norm": 0.001730743213556707, |
| "learning_rate": 4.697214585037087e-06, |
| "logits/chosen": 0.5332534313201904, |
| "logits/rejected": 1.249335765838623, |
| "logps/chosen": -158.67762756347656, |
| "logps/rejected": -190.1533203125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0616326332092285, |
| "rewards/margins": 12.478856086730957, |
| "rewards/rejected": -9.417223930358887, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.665553235908142, |
| "grad_norm": 0.003226222237572074, |
| "learning_rate": 4.5092514782663255e-06, |
| "logits/chosen": 0.6613823771476746, |
| "logits/rejected": 1.0511373281478882, |
| "logps/chosen": -144.17306518554688, |
| "logps/rejected": -228.23768615722656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.6547493934631348, |
| "rewards/margins": 12.782587051391602, |
| "rewards/rejected": -9.127838134765625, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.67223382045929, |
| "grad_norm": 0.0005425635608844459, |
| "learning_rate": 4.325017236346378e-06, |
| "logits/chosen": 0.5887613296508789, |
| "logits/rejected": 1.0347230434417725, |
| "logps/chosen": -158.33848571777344, |
| "logps/rejected": -240.96356201171875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1818087100982666, |
| "rewards/margins": 13.404643058776855, |
| "rewards/rejected": -10.222835540771484, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.6789144050104383, |
| "grad_norm": 0.00024664445663802326, |
| "learning_rate": 4.144520959525959e-06, |
| "logits/chosen": 0.5648880004882812, |
| "logits/rejected": 0.8261204957962036, |
| "logps/chosen": -149.58128356933594, |
| "logps/rejected": -245.0674591064453, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.261422872543335, |
| "rewards/margins": 12.966839790344238, |
| "rewards/rejected": -9.70541763305664, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.6855949895615865, |
| "grad_norm": 0.0005279434262774885, |
| "learning_rate": 3.967771563417096e-06, |
| "logits/chosen": 0.7538788914680481, |
| "logits/rejected": 1.1771303415298462, |
| "logps/chosen": -135.6904754638672, |
| "logps/rejected": -227.23272705078125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.337376832962036, |
| "rewards/margins": 13.063421249389648, |
| "rewards/rejected": -9.726044654846191, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.6922755741127347, |
| "grad_norm": 0.00802143756300211, |
| "learning_rate": 3.794777778554615e-06, |
| "logits/chosen": 0.4799140691757202, |
| "logits/rejected": 1.0762194395065308, |
| "logps/chosen": -169.96649169921875, |
| "logps/rejected": -227.5594940185547, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.182255744934082, |
| "rewards/margins": 12.846413612365723, |
| "rewards/rejected": -9.66415786743164, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.698956158663883, |
| "grad_norm": 0.00045062805293127894, |
| "learning_rate": 3.6255481499649725e-06, |
| "logits/chosen": 0.4726150333881378, |
| "logits/rejected": 1.337223768234253, |
| "logps/chosen": -163.80133056640625, |
| "logps/rejected": -204.79391479492188, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.530905246734619, |
| "rewards/margins": 12.308239936828613, |
| "rewards/rejected": -8.777335166931152, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.7056367432150314, |
| "grad_norm": 0.0015004087472334504, |
| "learning_rate": 3.460091036744162e-06, |
| "logits/chosen": 0.27585047483444214, |
| "logits/rejected": 1.1584709882736206, |
| "logps/chosen": -174.3064422607422, |
| "logps/rejected": -199.953125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2562003135681152, |
| "rewards/margins": 12.504425048828125, |
| "rewards/rejected": -9.248224258422852, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.7123173277661796, |
| "grad_norm": 0.00020969973411411047, |
| "learning_rate": 3.2984146116448447e-06, |
| "logits/chosen": 0.5917679667472839, |
| "logits/rejected": 1.226263403892517, |
| "logps/chosen": -160.92601013183594, |
| "logps/rejected": -188.79367065429688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2227373123168945, |
| "rewards/margins": 12.113557815551758, |
| "rewards/rejected": -8.890820503234863, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.718997912317328, |
| "grad_norm": 0.000391695968573913, |
| "learning_rate": 3.140526860672557e-06, |
| "logits/chosen": 0.3402010500431061, |
| "logits/rejected": 0.9619426727294922, |
| "logps/chosen": -139.92054748535156, |
| "logps/rejected": -229.22862243652344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.010129928588867, |
| "rewards/margins": 13.08873176574707, |
| "rewards/rejected": -10.078601837158203, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.725678496868476, |
| "grad_norm": 0.0002957108954433352, |
| "learning_rate": 2.9864355826913873e-06, |
| "logits/chosen": 0.6995745897293091, |
| "logits/rejected": 0.9052819013595581, |
| "logps/chosen": -123.17851257324219, |
| "logps/rejected": -251.5882568359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.108473777770996, |
| "rewards/margins": 12.585604667663574, |
| "rewards/rejected": -9.477129936218262, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.732359081419624, |
| "grad_norm": 0.0005900960532017052, |
| "learning_rate": 2.83614838903862e-06, |
| "logits/chosen": 0.4034777283668518, |
| "logits/rejected": 1.2409098148345947, |
| "logps/chosen": -199.7462158203125, |
| "logps/rejected": -198.4483642578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5001864433288574, |
| "rewards/margins": 12.815964698791504, |
| "rewards/rejected": -9.315777778625488, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.7390396659707723, |
| "grad_norm": 0.0002275588922202587, |
| "learning_rate": 2.689672703148869e-06, |
| "logits/chosen": 0.4764283299446106, |
| "logits/rejected": 1.1419066190719604, |
| "logps/chosen": -169.3687744140625, |
| "logps/rejected": -220.18045043945312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3647985458374023, |
| "rewards/margins": 13.468372344970703, |
| "rewards/rejected": -10.1035737991333, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.745720250521921, |
| "grad_norm": 0.0013009293470531702, |
| "learning_rate": 2.5470157601873035e-06, |
| "logits/chosen": 0.36611929535865784, |
| "logits/rejected": 1.0027097463607788, |
| "logps/chosen": -189.9754638671875, |
| "logps/rejected": -249.90679931640625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5394551753997803, |
| "rewards/margins": 13.063716888427734, |
| "rewards/rejected": -9.524261474609375, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.752400835073069, |
| "grad_norm": 0.0015072039095684886, |
| "learning_rate": 2.4081846066923697e-06, |
| "logits/chosen": 0.4824734628200531, |
| "logits/rejected": 1.1585693359375, |
| "logps/chosen": -161.98866271972656, |
| "logps/rejected": -216.73757934570312, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2364959716796875, |
| "rewards/margins": 13.164081573486328, |
| "rewards/rejected": -9.92758560180664, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.7590814196242173, |
| "grad_norm": 0.0006356340018101037, |
| "learning_rate": 2.273186100227651e-06, |
| "logits/chosen": 0.564692497253418, |
| "logits/rejected": 1.2192484140396118, |
| "logps/chosen": -133.49551391601562, |
| "logps/rejected": -193.18711853027344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1987099647521973, |
| "rewards/margins": 11.772148132324219, |
| "rewards/rejected": -8.573437690734863, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.7657620041753654, |
| "grad_norm": 0.00035454286262393, |
| "learning_rate": 2.1420269090431712e-06, |
| "logits/chosen": 0.42993149161338806, |
| "logits/rejected": 1.0948328971862793, |
| "logps/chosen": -169.48936462402344, |
| "logps/rejected": -220.0684814453125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.176632881164551, |
| "rewards/margins": 12.51456069946289, |
| "rewards/rejected": -9.33792781829834, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.7724425887265136, |
| "grad_norm": 0.0007007503882050514, |
| "learning_rate": 2.0147135117460204e-06, |
| "logits/chosen": 0.38136231899261475, |
| "logits/rejected": 1.3104112148284912, |
| "logps/chosen": -161.56480407714844, |
| "logps/rejected": -181.53868103027344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7145438194274902, |
| "rewards/margins": 13.201973915100098, |
| "rewards/rejected": -9.48742961883545, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.779123173277662, |
| "grad_norm": 0.0013630822068080306, |
| "learning_rate": 1.891252196980311e-06, |
| "logits/chosen": 0.724543035030365, |
| "logits/rejected": 1.2720431089401245, |
| "logps/chosen": -174.0122833251953, |
| "logps/rejected": -216.48367309570312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.654141664505005, |
| "rewards/margins": 13.36253547668457, |
| "rewards/rejected": -9.708393096923828, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.78580375782881, |
| "grad_norm": 0.0008407873101532459, |
| "learning_rate": 1.7716490631165984e-06, |
| "logits/chosen": 0.5323563814163208, |
| "logits/rejected": 1.2073959112167358, |
| "logps/chosen": -149.6080322265625, |
| "logps/rejected": -223.80166625976562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5268023014068604, |
| "rewards/margins": 12.292684555053711, |
| "rewards/rejected": -8.76588249206543, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.792484342379958, |
| "grad_norm": 0.00027841454721055925, |
| "learning_rate": 1.6559100179506015e-06, |
| "logits/chosen": 0.46936628222465515, |
| "logits/rejected": 1.0593383312225342, |
| "logps/chosen": -169.1297607421875, |
| "logps/rejected": -212.625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.026738166809082, |
| "rewards/margins": 12.370702743530273, |
| "rewards/rejected": -9.343963623046875, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.7991649269311063, |
| "grad_norm": 0.0007251783972606063, |
| "learning_rate": 1.5440407784114285e-06, |
| "logits/chosen": 0.5382225513458252, |
| "logits/rejected": 1.2113491296768188, |
| "logps/chosen": -153.16236877441406, |
| "logps/rejected": -216.06365966796875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2709617614746094, |
| "rewards/margins": 12.992986679077148, |
| "rewards/rejected": -9.722025871276855, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.8058455114822545, |
| "grad_norm": 0.000872993899974972, |
| "learning_rate": 1.4360468702791885e-06, |
| "logits/chosen": 0.6498620510101318, |
| "logits/rejected": 1.0502396821975708, |
| "logps/chosen": -168.02088928222656, |
| "logps/rejected": -243.26052856445312, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.300208568572998, |
| "rewards/margins": 13.005234718322754, |
| "rewards/rejected": -9.705026626586914, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.812526096033403, |
| "grad_norm": 0.00035026189289055765, |
| "learning_rate": 1.3319336279119832e-06, |
| "logits/chosen": 0.5399425029754639, |
| "logits/rejected": 1.0422205924987793, |
| "logps/chosen": -145.86790466308594, |
| "logps/rejected": -231.82078552246094, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.275618076324463, |
| "rewards/margins": 12.443395614624023, |
| "rewards/rejected": -9.167777061462402, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.8192066805845513, |
| "grad_norm": 0.00047282990999519825, |
| "learning_rate": 1.2317061939825092e-06, |
| "logits/chosen": 0.6304022669792175, |
| "logits/rejected": 0.9782839417457581, |
| "logps/chosen": -132.8438262939453, |
| "logps/rejected": -230.8707275390625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1279447078704834, |
| "rewards/margins": 12.440394401550293, |
| "rewards/rejected": -9.312448501586914, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.8258872651356994, |
| "grad_norm": 0.0015838721301406622, |
| "learning_rate": 1.1353695192239767e-06, |
| "logits/chosen": 0.34126338362693787, |
| "logits/rejected": 0.872474193572998, |
| "logps/chosen": -150.72293090820312, |
| "logps/rejected": -219.77139282226562, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.3702316284179688, |
| "rewards/margins": 13.250913619995117, |
| "rewards/rejected": -9.880681037902832, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.8325678496868476, |
| "grad_norm": 0.0009925226913765073, |
| "learning_rate": 1.042928362185558e-06, |
| "logits/chosen": 0.5055323243141174, |
| "logits/rejected": 1.2043384313583374, |
| "logps/chosen": -165.8092041015625, |
| "logps/rejected": -205.6092529296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0547263622283936, |
| "rewards/margins": 12.708995819091797, |
| "rewards/rejected": -9.654268264770508, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.8392484342379958, |
| "grad_norm": 0.0010521183721721172, |
| "learning_rate": 9.543872889974027e-07, |
| "logits/chosen": 0.6694621443748474, |
| "logits/rejected": 1.0147770643234253, |
| "logps/chosen": -141.67022705078125, |
| "logps/rejected": -271.6776428222656, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1093451976776123, |
| "rewards/margins": 13.3922119140625, |
| "rewards/rejected": -10.282866477966309, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.845929018789144, |
| "grad_norm": 0.0004929823335260153, |
| "learning_rate": 8.697506731450222e-07, |
| "logits/chosen": 0.36508888006210327, |
| "logits/rejected": 1.2144858837127686, |
| "logps/chosen": -174.45252990722656, |
| "logps/rejected": -199.98072814941406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2696728706359863, |
| "rewards/margins": 12.374869346618652, |
| "rewards/rejected": -9.105195999145508, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.852609603340292, |
| "grad_norm": 0.0011409110156819224, |
| "learning_rate": 7.890226952532942e-07, |
| "logits/chosen": 0.38490644097328186, |
| "logits/rejected": 1.3086551427841187, |
| "logps/chosen": -209.907470703125, |
| "logps/rejected": -200.3457489013672, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.552093982696533, |
| "rewards/margins": 12.373093605041504, |
| "rewards/rejected": -8.820999145507812, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.8592901878914407, |
| "grad_norm": 0.0007044864469207823, |
| "learning_rate": 7.122073428799781e-07, |
| "logits/chosen": 0.5145556926727295, |
| "logits/rejected": 1.0260547399520874, |
| "logps/chosen": -135.60379028320312, |
| "logps/rejected": -226.86810302734375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.268608570098877, |
| "rewards/margins": 13.123603820800781, |
| "rewards/rejected": -9.854996681213379, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.865970772442589, |
| "grad_norm": 0.0031695598736405373, |
| "learning_rate": 6.393084103187264e-07, |
| "logits/chosen": 0.532058596611023, |
| "logits/rejected": 0.8780273795127869, |
| "logps/chosen": -164.82818603515625, |
| "logps/rejected": -250.14654541015625, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.7977402210235596, |
| "rewards/margins": 13.179388046264648, |
| "rewards/rejected": -9.381647109985352, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.872651356993737, |
| "grad_norm": 0.0007318177376873791, |
| "learning_rate": 5.703294984116525e-07, |
| "logits/chosen": 0.3651660084724426, |
| "logits/rejected": 0.9196643829345703, |
| "logps/chosen": -173.27577209472656, |
| "logps/rejected": -234.96435546875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.602478504180908, |
| "rewards/margins": 13.61583423614502, |
| "rewards/rejected": -10.013355255126953, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.8793319415448853, |
| "grad_norm": 0.0004672574286814779, |
| "learning_rate": 5.052740143714996e-07, |
| "logits/chosen": 0.3767249882221222, |
| "logits/rejected": 0.877831757068634, |
| "logps/chosen": -180.11639404296875, |
| "logps/rejected": -272.79217529296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4903178215026855, |
| "rewards/margins": 13.622541427612305, |
| "rewards/rejected": -10.132223129272461, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.8860125260960334, |
| "grad_norm": 0.006710366811603308, |
| "learning_rate": 4.441451716133216e-07, |
| "logits/chosen": 0.3890739977359772, |
| "logits/rejected": 1.1784958839416504, |
| "logps/chosen": -192.0306396484375, |
| "logps/rejected": -246.87294006347656, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.150397777557373, |
| "rewards/margins": 13.305784225463867, |
| "rewards/rejected": -10.15538501739502, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.8926931106471816, |
| "grad_norm": 0.0009442153386771679, |
| "learning_rate": 3.8694598959575725e-07, |
| "logits/chosen": 0.49244481325149536, |
| "logits/rejected": 1.0682514905929565, |
| "logps/chosen": -193.1286163330078, |
| "logps/rejected": -267.5054626464844, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.750030040740967, |
| "rewards/margins": 13.207242012023926, |
| "rewards/rejected": -9.457212448120117, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.8993736951983298, |
| "grad_norm": 0.0028639482334256172, |
| "learning_rate": 3.3367929367190463e-07, |
| "logits/chosen": 0.610533595085144, |
| "logits/rejected": 1.2630019187927246, |
| "logps/chosen": -129.1243438720703, |
| "logps/rejected": -186.8758544921875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5877702236175537, |
| "rewards/margins": 12.923406600952148, |
| "rewards/rejected": -9.335637092590332, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.906054279749478, |
| "grad_norm": 0.0031200747471302748, |
| "learning_rate": 2.843477149497265e-07, |
| "logits/chosen": 0.7550321817398071, |
| "logits/rejected": 0.9643117189407349, |
| "logps/chosen": -83.94383239746094, |
| "logps/rejected": -206.9544219970703, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.38393497467041, |
| "rewards/margins": 12.600971221923828, |
| "rewards/rejected": -9.217036247253418, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.912734864300626, |
| "grad_norm": 0.0013908625114709139, |
| "learning_rate": 2.3895369016211813e-07, |
| "logits/chosen": 0.6865592002868652, |
| "logits/rejected": 0.957063615322113, |
| "logps/chosen": -148.91688537597656, |
| "logps/rejected": -231.92611694335938, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.5575623512268066, |
| "rewards/margins": 13.301969528198242, |
| "rewards/rejected": -9.744406700134277, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.9194154488517743, |
| "grad_norm": 0.04500668868422508, |
| "learning_rate": 1.9749946154651534e-07, |
| "logits/chosen": 0.4031934142112732, |
| "logits/rejected": 0.901584267616272, |
| "logps/chosen": -159.9291229248047, |
| "logps/rejected": -242.07891845703125, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2847237586975098, |
| "rewards/margins": 12.680036544799805, |
| "rewards/rejected": -9.395313262939453, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.926096033402923, |
| "grad_norm": 0.000297738763038069, |
| "learning_rate": 1.5998707673419156e-07, |
| "logits/chosen": 0.6217765808105469, |
| "logits/rejected": 1.300815463066101, |
| "logps/chosen": -175.9449920654297, |
| "logps/rejected": -220.9844207763672, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2398884296417236, |
| "rewards/margins": 12.802839279174805, |
| "rewards/rejected": -9.562949180603027, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.932776617954071, |
| "grad_norm": 0.007137789856642485, |
| "learning_rate": 1.2641838864905887e-07, |
| "logits/chosen": 0.5026199817657471, |
| "logits/rejected": 1.145331621170044, |
| "logps/chosen": -209.8468017578125, |
| "logps/rejected": -262.7084045410156, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.754572868347168, |
| "rewards/margins": 13.609201431274414, |
| "rewards/rejected": -9.854629516601562, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.9394572025052192, |
| "grad_norm": 0.00042287795804440975, |
| "learning_rate": 9.679505541615008e-08, |
| "logits/chosen": 0.3595789670944214, |
| "logits/rejected": 1.4266026020050049, |
| "logps/chosen": -190.64529418945312, |
| "logps/rejected": -162.8024139404297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.641660451889038, |
| "rewards/margins": 11.447403907775879, |
| "rewards/rejected": -7.80574369430542, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.9461377870563674, |
| "grad_norm": 0.0004641091509256512, |
| "learning_rate": 7.11185402797554e-08, |
| "logits/chosen": 0.4248248338699341, |
| "logits/rejected": 0.8252262473106384, |
| "logps/chosen": -162.13511657714844, |
| "logps/rejected": -238.3690948486328, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.014636516571045, |
| "rewards/margins": 13.756202697753906, |
| "rewards/rejected": -9.741565704345703, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.9528183716075156, |
| "grad_norm": 0.00071021041367203, |
| "learning_rate": 4.9390111531115724e-08, |
| "logits/chosen": 0.8295226097106934, |
| "logits/rejected": 1.1506669521331787, |
| "logps/chosen": -161.70565795898438, |
| "logps/rejected": -239.46661376953125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.0058834552764893, |
| "rewards/margins": 13.02005386352539, |
| "rewards/rejected": -10.01417064666748, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.9594989561586638, |
| "grad_norm": 0.0019515601452440023, |
| "learning_rate": 3.1610842445761736e-08, |
| "logits/chosen": 0.4108167588710785, |
| "logits/rejected": 1.0816458463668823, |
| "logps/chosen": -161.15087890625, |
| "logps/rejected": -216.76573181152344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.153501272201538, |
| "rewards/margins": 12.347149848937988, |
| "rewards/rejected": -9.193648338317871, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.9661795407098124, |
| "grad_norm": 0.0006106442306190729, |
| "learning_rate": 1.7781611230551775e-08, |
| "logits/chosen": 0.5510632991790771, |
| "logits/rejected": 1.0120404958724976, |
| "logps/chosen": -161.36676025390625, |
| "logps/rejected": -249.424560546875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.663705825805664, |
| "rewards/margins": 13.109761238098145, |
| "rewards/rejected": -9.44605541229248, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.9728601252609606, |
| "grad_norm": 0.0006236585322767496, |
| "learning_rate": 7.903100980222178e-09, |
| "logits/chosen": 0.658979594707489, |
| "logits/rejected": 1.351822018623352, |
| "logps/chosen": -174.4720458984375, |
| "logps/rejected": -205.27960205078125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.2610790729522705, |
| "rewards/margins": 12.586731910705566, |
| "rewards/rejected": -9.325651168823242, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.9795407098121087, |
| "grad_norm": 0.00028068042593076825, |
| "learning_rate": 1.975799643707532e-09, |
| "logits/chosen": 0.5611605048179626, |
| "logits/rejected": 1.0852856636047363, |
| "logps/chosen": -155.2922821044922, |
| "logps/rejected": -229.93069458007812, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4737355709075928, |
| "rewards/margins": 12.924980163574219, |
| "rewards/rejected": -9.451244354248047, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.986221294363257, |
| "grad_norm": 0.007568780332803726, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.4653575122356415, |
| "logits/rejected": 1.259974718093872, |
| "logps/chosen": -172.01187133789062, |
| "logps/rejected": -166.48036193847656, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.4418039321899414, |
| "rewards/margins": 11.374272346496582, |
| "rewards/rejected": -7.932469367980957, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.986221294363257, |
| "step": 447, |
| "total_flos": 0.0, |
| "train_loss": 0.015126691275923332, |
| "train_runtime": 9353.1303, |
| "train_samples_per_second": 6.145, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 447, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|