| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996020692399522, | |
| "eval_steps": 500, | |
| "global_step": 1884, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.645502645502645e-09, | |
| "logits/chosen": -1.8052858114242554, | |
| "logits/rejected": -1.8250553607940674, | |
| "logps/chosen": -201.6904296875, | |
| "logps/rejected": -206.93157958984375, | |
| "loss": 7734.375, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "rewards/safe_rewards": 0.0, | |
| "rewards/unsafe_rewards": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.6455026455026453e-08, | |
| "logits/chosen": -2.025691032409668, | |
| "logits/rejected": -1.8649556636810303, | |
| "logps/chosen": -270.43963623046875, | |
| "logps/rejected": -169.98423767089844, | |
| "loss": 7727.0087, | |
| "rewards/accuracies": 0.4027777910232544, | |
| "rewards/chosen": 4.114356852369383e-05, | |
| "rewards/margins": -0.0002653732954058796, | |
| "rewards/rejected": 0.00030651676934212446, | |
| "rewards/safe_rewards": -1.17086410682532e-05, | |
| "rewards/unsafe_rewards": -0.0006500756135210395, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.2910052910052905e-08, | |
| "logits/chosen": -1.961146593093872, | |
| "logits/rejected": -1.873740553855896, | |
| "logps/chosen": -189.17404174804688, | |
| "logps/rejected": -176.31651306152344, | |
| "loss": 7718.007, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -6.340327672660351e-06, | |
| "rewards/margins": -0.00010152898175874725, | |
| "rewards/rejected": 9.518869046587497e-05, | |
| "rewards/safe_rewards": 0.00045737033360637724, | |
| "rewards/unsafe_rewards": -8.718876051716506e-05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 7.936507936507936e-08, | |
| "logits/chosen": -1.9912703037261963, | |
| "logits/rejected": -1.883933424949646, | |
| "logps/chosen": -198.4538116455078, | |
| "logps/rejected": -183.28781127929688, | |
| "loss": 7515.9359, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.0001133469631895423, | |
| "rewards/margins": 0.0007399408495984972, | |
| "rewards/rejected": -0.0006265938864089549, | |
| "rewards/safe_rewards": 0.00022509883274324238, | |
| "rewards/unsafe_rewards": 0.0002071214112220332, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.0582010582010581e-07, | |
| "logits/chosen": -1.927167534828186, | |
| "logits/rejected": -1.8453724384307861, | |
| "logps/chosen": -198.85276794433594, | |
| "logps/rejected": -174.22967529296875, | |
| "loss": 7334.5094, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.00027468582266010344, | |
| "rewards/margins": 0.0014765586238354445, | |
| "rewards/rejected": -0.0012018729466944933, | |
| "rewards/safe_rewards": 0.0002533269871491939, | |
| "rewards/unsafe_rewards": 0.00015336349315475672, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.3227513227513225e-07, | |
| "logits/chosen": -2.037893533706665, | |
| "logits/rejected": -1.8426322937011719, | |
| "logps/chosen": -214.9281463623047, | |
| "logps/rejected": -162.3707733154297, | |
| "loss": 7399.5859, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.0017435807967558503, | |
| "rewards/margins": 0.001902301562950015, | |
| "rewards/rejected": -0.00015872062067501247, | |
| "rewards/safe_rewards": 0.002309921896085143, | |
| "rewards/unsafe_rewards": 0.00044932105811312795, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.5873015873015872e-07, | |
| "logits/chosen": -2.011747360229492, | |
| "logits/rejected": -1.8823707103729248, | |
| "logps/chosen": -182.73411560058594, | |
| "logps/rejected": -155.423095703125, | |
| "loss": 7214.4602, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.0006955948774702847, | |
| "rewards/margins": 0.005063413176685572, | |
| "rewards/rejected": -0.0057590072974562645, | |
| "rewards/safe_rewards": -0.0021988481748849154, | |
| "rewards/unsafe_rewards": 0.0001153635821538046, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.8518518518518516e-07, | |
| "logits/chosen": -1.975612998008728, | |
| "logits/rejected": -1.8158948421478271, | |
| "logps/chosen": -186.48574829101562, | |
| "logps/rejected": -168.57896423339844, | |
| "loss": 7816.8766, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.007440758403390646, | |
| "rewards/margins": 0.010602862574160099, | |
| "rewards/rejected": -0.018043622374534607, | |
| "rewards/safe_rewards": -0.010516250506043434, | |
| "rewards/unsafe_rewards": -0.015666166320443153, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.1164021164021162e-07, | |
| "logits/chosen": -1.9063125848770142, | |
| "logits/rejected": -1.7897474765777588, | |
| "logps/chosen": -210.2836151123047, | |
| "logps/rejected": -180.822998046875, | |
| "loss": 7304.9531, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.024481967091560364, | |
| "rewards/margins": 0.016244709491729736, | |
| "rewards/rejected": -0.0407266803085804, | |
| "rewards/safe_rewards": -0.02365388534963131, | |
| "rewards/unsafe_rewards": -0.0289783775806427, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.3809523809523806e-07, | |
| "logits/chosen": -1.994605302810669, | |
| "logits/rejected": -1.866681694984436, | |
| "logps/chosen": -203.6532440185547, | |
| "logps/rejected": -174.1517791748047, | |
| "loss": 7251.9984, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.06749475002288818, | |
| "rewards/margins": 0.020768558606505394, | |
| "rewards/rejected": -0.08826331794261932, | |
| "rewards/safe_rewards": -0.06556878238916397, | |
| "rewards/unsafe_rewards": -0.052192188799381256, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.645502645502645e-07, | |
| "logits/chosen": -1.9495357275009155, | |
| "logits/rejected": -1.8006837368011475, | |
| "logps/chosen": -205.99411010742188, | |
| "logps/rejected": -192.54415893554688, | |
| "loss": 6776.1008, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.11886356770992279, | |
| "rewards/margins": 0.020749244838953018, | |
| "rewards/rejected": -0.1396128088235855, | |
| "rewards/safe_rewards": -0.11704058945178986, | |
| "rewards/unsafe_rewards": -0.1348837912082672, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9100529100529097e-07, | |
| "logits/chosen": -1.9887052774429321, | |
| "logits/rejected": -1.8671073913574219, | |
| "logps/chosen": -226.98001098632812, | |
| "logps/rejected": -217.73733520507812, | |
| "loss": 6636.9766, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.11880362033843994, | |
| "rewards/margins": 0.03935481607913971, | |
| "rewards/rejected": -0.15815845131874084, | |
| "rewards/safe_rewards": -0.14540424942970276, | |
| "rewards/unsafe_rewards": -0.11240017414093018, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.1746031746031743e-07, | |
| "logits/chosen": -1.8841511011123657, | |
| "logits/rejected": -1.6952005624771118, | |
| "logps/chosen": -235.6121368408203, | |
| "logps/rejected": -192.76162719726562, | |
| "loss": 6804.4828, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.1285235583782196, | |
| "rewards/margins": 0.07450314611196518, | |
| "rewards/rejected": -0.20302672684192657, | |
| "rewards/safe_rewards": -0.12894900143146515, | |
| "rewards/unsafe_rewards": -0.12272067368030548, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.439153439153439e-07, | |
| "logits/chosen": -1.8711330890655518, | |
| "logits/rejected": -1.6887938976287842, | |
| "logps/chosen": -225.3953094482422, | |
| "logps/rejected": -200.31997680664062, | |
| "loss": 7036.6016, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.11849894374608994, | |
| "rewards/margins": 0.05801115185022354, | |
| "rewards/rejected": -0.17651011049747467, | |
| "rewards/safe_rewards": -0.10611984878778458, | |
| "rewards/unsafe_rewards": -0.14429841935634613, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.703703703703703e-07, | |
| "logits/chosen": -1.826206922531128, | |
| "logits/rejected": -1.6439968347549438, | |
| "logps/chosen": -220.1838836669922, | |
| "logps/rejected": -185.7141876220703, | |
| "loss": 6936.9914, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.11376659572124481, | |
| "rewards/margins": 0.0765247792005539, | |
| "rewards/rejected": -0.1902913898229599, | |
| "rewards/safe_rewards": -0.11482509225606918, | |
| "rewards/unsafe_rewards": -0.09925278276205063, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.968253968253968e-07, | |
| "logits/chosen": -1.7187334299087524, | |
| "logits/rejected": -1.5741361379623413, | |
| "logps/chosen": -211.09603881835938, | |
| "logps/rejected": -203.66156005859375, | |
| "loss": 6555.6867, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.19104455411434174, | |
| "rewards/margins": 0.06891994178295135, | |
| "rewards/rejected": -0.2599644958972931, | |
| "rewards/safe_rewards": -0.20118245482444763, | |
| "rewards/unsafe_rewards": -0.16981182992458344, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.2328042328042324e-07, | |
| "logits/chosen": -1.7090606689453125, | |
| "logits/rejected": -1.4574247598648071, | |
| "logps/chosen": -231.1162567138672, | |
| "logps/rejected": -197.13832092285156, | |
| "loss": 6483.332, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.2999975085258484, | |
| "rewards/margins": 0.08841492235660553, | |
| "rewards/rejected": -0.3884124159812927, | |
| "rewards/safe_rewards": -0.2963607907295227, | |
| "rewards/unsafe_rewards": -0.2815978527069092, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.497354497354497e-07, | |
| "logits/chosen": -1.7472738027572632, | |
| "logits/rejected": -1.5065333843231201, | |
| "logps/chosen": -255.1507110595703, | |
| "logps/rejected": -221.82241821289062, | |
| "loss": 6801.5375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.23129959404468536, | |
| "rewards/margins": 0.12043756246566772, | |
| "rewards/rejected": -0.35173720121383667, | |
| "rewards/safe_rewards": -0.22959312796592712, | |
| "rewards/unsafe_rewards": -0.1985938847064972, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.761904761904761e-07, | |
| "logits/chosen": -1.680676817893982, | |
| "logits/rejected": -1.4166452884674072, | |
| "logps/chosen": -216.8690948486328, | |
| "logps/rejected": -191.8008270263672, | |
| "loss": 6535.7055, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.26913732290267944, | |
| "rewards/margins": 0.11233188211917877, | |
| "rewards/rejected": -0.381469190120697, | |
| "rewards/safe_rewards": -0.26176974177360535, | |
| "rewards/unsafe_rewards": -0.23940448462963104, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.999995705919032e-07, | |
| "logits/chosen": -1.5433807373046875, | |
| "logits/rejected": -1.2667306661605835, | |
| "logps/chosen": -224.0026397705078, | |
| "logps/rejected": -205.34414672851562, | |
| "loss": 6409.0121, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.19693121314048767, | |
| "rewards/margins": 0.09455744177103043, | |
| "rewards/rejected": -0.2914886772632599, | |
| "rewards/safe_rewards": -0.17649488151073456, | |
| "rewards/unsafe_rewards": -0.18380855023860931, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.999480434051858e-07, | |
| "logits/chosen": -1.5521910190582275, | |
| "logits/rejected": -1.3097938299179077, | |
| "logps/chosen": -225.257568359375, | |
| "logps/rejected": -205.92129516601562, | |
| "loss": 6576.5188, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.1997550129890442, | |
| "rewards/margins": 0.0904761329293251, | |
| "rewards/rejected": -0.2902311384677887, | |
| "rewards/safe_rewards": -0.20136451721191406, | |
| "rewards/unsafe_rewards": -0.21680407226085663, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.998106548810311e-07, | |
| "logits/chosen": -1.3539698123931885, | |
| "logits/rejected": -1.2038872241973877, | |
| "logps/chosen": -212.8267364501953, | |
| "logps/rejected": -220.0903778076172, | |
| "loss": 6444.5828, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2437468022108078, | |
| "rewards/margins": 0.14799915254116058, | |
| "rewards/rejected": -0.3917458951473236, | |
| "rewards/safe_rewards": -0.2773512601852417, | |
| "rewards/unsafe_rewards": -0.2216939926147461, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.995874522146975e-07, | |
| "logits/chosen": -1.503328561782837, | |
| "logits/rejected": -1.3146250247955322, | |
| "logps/chosen": -236.4509735107422, | |
| "logps/rejected": -211.6634063720703, | |
| "loss": 6233.5547, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.29747992753982544, | |
| "rewards/margins": 0.13039958477020264, | |
| "rewards/rejected": -0.4278795123100281, | |
| "rewards/safe_rewards": -0.2768808901309967, | |
| "rewards/unsafe_rewards": -0.3182833790779114, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.992785120800375e-07, | |
| "logits/chosen": -1.576887845993042, | |
| "logits/rejected": -1.2664101123809814, | |
| "logps/chosen": -237.9243621826172, | |
| "logps/rejected": -213.4459991455078, | |
| "loss": 6108.0914, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.23068375885486603, | |
| "rewards/margins": 0.14957153797149658, | |
| "rewards/rejected": -0.3802553117275238, | |
| "rewards/safe_rewards": -0.22292426228523254, | |
| "rewards/unsafe_rewards": -0.18162095546722412, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.988839406031596e-07, | |
| "logits/chosen": -1.515092134475708, | |
| "logits/rejected": -1.2886550426483154, | |
| "logps/chosen": -223.7300567626953, | |
| "logps/rejected": -192.06324768066406, | |
| "loss": 6310.6699, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.24790284037590027, | |
| "rewards/margins": 0.1096932515501976, | |
| "rewards/rejected": -0.3575960695743561, | |
| "rewards/safe_rewards": -0.2673969864845276, | |
| "rewards/unsafe_rewards": -0.24145250022411346, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.98403873325972e-07, | |
| "logits/chosen": -1.5146888494491577, | |
| "logits/rejected": -1.3244738578796387, | |
| "logps/chosen": -213.21694946289062, | |
| "logps/rejected": -209.35061645507812, | |
| "loss": 6209.5707, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.2071472406387329, | |
| "rewards/margins": 0.16860046982765198, | |
| "rewards/rejected": -0.3757476806640625, | |
| "rewards/safe_rewards": -0.1998087763786316, | |
| "rewards/unsafe_rewards": -0.20211009681224823, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.978384751596212e-07, | |
| "logits/chosen": -1.3180285692214966, | |
| "logits/rejected": -1.1171799898147583, | |
| "logps/chosen": -232.109375, | |
| "logps/rejected": -236.84072875976562, | |
| "loss": 6328.7531, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.32092350721359253, | |
| "rewards/margins": 0.17156612873077393, | |
| "rewards/rejected": -0.49248963594436646, | |
| "rewards/safe_rewards": -0.4227983355522156, | |
| "rewards/unsafe_rewards": -0.3325851559638977, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.971879403278432e-07, | |
| "logits/chosen": -1.1372450590133667, | |
| "logits/rejected": -0.9446180462837219, | |
| "logps/chosen": -234.88888549804688, | |
| "logps/rejected": -224.05886840820312, | |
| "loss": 6312.1719, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.29563266038894653, | |
| "rewards/margins": 0.12811212241649628, | |
| "rewards/rejected": -0.4237447679042816, | |
| "rewards/safe_rewards": -0.33217892050743103, | |
| "rewards/unsafe_rewards": -0.27307888865470886, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.964524923002436e-07, | |
| "logits/chosen": -1.415801763534546, | |
| "logits/rejected": -1.1731336116790771, | |
| "logps/chosen": -241.7359619140625, | |
| "logps/rejected": -224.5096893310547, | |
| "loss": 5974.0195, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3053835928440094, | |
| "rewards/margins": 0.16657045483589172, | |
| "rewards/rejected": -0.4719540476799011, | |
| "rewards/safe_rewards": -0.3295218348503113, | |
| "rewards/unsafe_rewards": -0.30390697717666626, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.956323837155325e-07, | |
| "logits/chosen": -1.2966214418411255, | |
| "logits/rejected": -1.1260521411895752, | |
| "logps/chosen": -227.2568359375, | |
| "logps/rejected": -214.1421661376953, | |
| "loss": 6133.0227, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.261239230632782, | |
| "rewards/margins": 0.15825437009334564, | |
| "rewards/rejected": -0.4194936156272888, | |
| "rewards/safe_rewards": -0.2375851422548294, | |
| "rewards/unsafe_rewards": -0.2705303132534027, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.947278962947386e-07, | |
| "logits/chosen": -1.255904197692871, | |
| "logits/rejected": -1.0300556421279907, | |
| "logps/chosen": -231.86593627929688, | |
| "logps/rejected": -213.03768920898438, | |
| "loss": 5684.9316, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.30576351284980774, | |
| "rewards/margins": 0.1560250222682953, | |
| "rewards/rejected": -0.4617885649204254, | |
| "rewards/safe_rewards": -0.3117372691631317, | |
| "rewards/unsafe_rewards": -0.30344492197036743, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.937393407444337e-07, | |
| "logits/chosen": -1.1847805976867676, | |
| "logits/rejected": -0.8935750722885132, | |
| "logps/chosen": -235.5170135498047, | |
| "logps/rejected": -226.17910766601562, | |
| "loss": 5606.7586, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.4436865746974945, | |
| "rewards/margins": 0.12356774508953094, | |
| "rewards/rejected": -0.5672543048858643, | |
| "rewards/safe_rewards": -0.4222384989261627, | |
| "rewards/unsafe_rewards": -0.49501723051071167, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.926670566499992e-07, | |
| "logits/chosen": -0.6831132173538208, | |
| "logits/rejected": -0.43409886956214905, | |
| "logps/chosen": -230.1105499267578, | |
| "logps/rejected": -223.13021850585938, | |
| "loss": 6029.3086, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.4783251881599426, | |
| "rewards/margins": 0.13184307515621185, | |
| "rewards/rejected": -0.6101682782173157, | |
| "rewards/safe_rewards": -0.46370235085487366, | |
| "rewards/unsafe_rewards": -0.4838125705718994, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.915114123589732e-07, | |
| "logits/chosen": -0.5296390652656555, | |
| "logits/rejected": -0.23315271735191345, | |
| "logps/chosen": -264.1290588378906, | |
| "logps/rejected": -222.7255401611328, | |
| "loss": 6587.2148, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.49660125374794006, | |
| "rewards/margins": 0.1269882619380951, | |
| "rewards/rejected": -0.6235895156860352, | |
| "rewards/safe_rewards": -0.5574027299880981, | |
| "rewards/unsafe_rewards": -0.5570284128189087, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.90272804854517e-07, | |
| "logits/chosen": -0.20833459496498108, | |
| "logits/rejected": 0.08662636578083038, | |
| "logps/chosen": -271.68389892578125, | |
| "logps/rejected": -259.1782531738281, | |
| "loss": 6224.5324, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.5533224940299988, | |
| "rewards/margins": 0.15772438049316406, | |
| "rewards/rejected": -0.7110469341278076, | |
| "rewards/safe_rewards": -0.5448375940322876, | |
| "rewards/unsafe_rewards": -0.5393844842910767, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.889516596190448e-07, | |
| "logits/chosen": -0.7373126149177551, | |
| "logits/rejected": -0.34005147218704224, | |
| "logps/chosen": -293.0935363769531, | |
| "logps/rejected": -241.9617156982422, | |
| "loss": 6110.7906, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5001389980316162, | |
| "rewards/margins": 0.1725221574306488, | |
| "rewards/rejected": -0.6726611852645874, | |
| "rewards/safe_rewards": -0.4835886061191559, | |
| "rewards/unsafe_rewards": -0.5382236838340759, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.875484304880629e-07, | |
| "logits/chosen": -0.8152839541435242, | |
| "logits/rejected": -0.4126107096672058, | |
| "logps/chosen": -302.5885314941406, | |
| "logps/rejected": -256.1798095703125, | |
| "loss": 6488.7234, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.48745980858802795, | |
| "rewards/margins": 0.10641022026538849, | |
| "rewards/rejected": -0.5938700437545776, | |
| "rewards/safe_rewards": -0.449713796377182, | |
| "rewards/unsafe_rewards": -0.48859700560569763, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.860635994942702e-07, | |
| "logits/chosen": -0.47416171431541443, | |
| "logits/rejected": 0.00913926400244236, | |
| "logps/chosen": -258.38189697265625, | |
| "logps/rejected": -230.67880249023438, | |
| "loss": 5790.3816, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.5084312558174133, | |
| "rewards/margins": 0.1444414108991623, | |
| "rewards/rejected": -0.6528726816177368, | |
| "rewards/safe_rewards": -0.5270028114318848, | |
| "rewards/unsafe_rewards": -0.48991069197654724, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.844976767019714e-07, | |
| "logits/chosen": -0.19216355681419373, | |
| "logits/rejected": 0.15172423422336578, | |
| "logps/chosen": -222.911865234375, | |
| "logps/rejected": -202.00888061523438, | |
| "loss": 5908.2133, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5394010543823242, | |
| "rewards/margins": 0.11715151369571686, | |
| "rewards/rejected": -0.6565525531768799, | |
| "rewards/safe_rewards": -0.5183984041213989, | |
| "rewards/unsafe_rewards": -0.5164821743965149, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.828512000318616e-07, | |
| "logits/chosen": -0.213291734457016, | |
| "logits/rejected": 0.39291974902153015, | |
| "logps/chosen": -303.5594177246094, | |
| "logps/rejected": -259.14178466796875, | |
| "loss": 6109.6039, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5700324177742004, | |
| "rewards/margins": 0.1927037090063095, | |
| "rewards/rejected": -0.7627362012863159, | |
| "rewards/safe_rewards": -0.5912032723426819, | |
| "rewards/unsafe_rewards": -0.5395609140396118, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.811247350762418e-07, | |
| "logits/chosen": -0.36068278551101685, | |
| "logits/rejected": 0.05598723143339157, | |
| "logps/chosen": -240.6222381591797, | |
| "logps/rejected": -234.20803833007812, | |
| "loss": 5907.1703, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.554689347743988, | |
| "rewards/margins": 0.17352624237537384, | |
| "rewards/rejected": -0.7282156348228455, | |
| "rewards/safe_rewards": -0.5173069834709167, | |
| "rewards/unsafe_rewards": -0.5826700329780579, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.79318874904728e-07, | |
| "logits/chosen": -0.5469863414764404, | |
| "logits/rejected": -0.3919845223426819, | |
| "logps/chosen": -267.99761962890625, | |
| "logps/rejected": -260.9379577636719, | |
| "loss": 6323.5375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5513988137245178, | |
| "rewards/margins": 0.16061297059059143, | |
| "rewards/rejected": -0.7120116949081421, | |
| "rewards/safe_rewards": -0.5992297530174255, | |
| "rewards/unsafe_rewards": -0.5494996309280396, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.774342398605221e-07, | |
| "logits/chosen": -1.3936598300933838, | |
| "logits/rejected": -1.0238125324249268, | |
| "logps/chosen": -262.09033203125, | |
| "logps/rejected": -221.07174682617188, | |
| "loss": 5492.8094, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5323154926300049, | |
| "rewards/margins": 0.15208503603935242, | |
| "rewards/rejected": -0.6844004988670349, | |
| "rewards/safe_rewards": -0.5349102020263672, | |
| "rewards/unsafe_rewards": -0.505738377571106, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.754714773473134e-07, | |
| "logits/chosen": -1.2268015146255493, | |
| "logits/rejected": -1.0391647815704346, | |
| "logps/chosen": -248.2527313232422, | |
| "logps/rejected": -258.4667663574219, | |
| "loss": 6146.5922, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.5346105694770813, | |
| "rewards/margins": 0.18027544021606445, | |
| "rewards/rejected": -0.7148860692977905, | |
| "rewards/safe_rewards": -0.4759598672389984, | |
| "rewards/unsafe_rewards": -0.534007728099823, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.734312616068851e-07, | |
| "logits/chosen": -1.2311909198760986, | |
| "logits/rejected": -0.9865934252738953, | |
| "logps/chosen": -214.25851440429688, | |
| "logps/rejected": -198.68943786621094, | |
| "loss": 5944.2828, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3851444125175476, | |
| "rewards/margins": 0.0964752659201622, | |
| "rewards/rejected": -0.481619656085968, | |
| "rewards/safe_rewards": -0.40014153718948364, | |
| "rewards/unsafe_rewards": -0.4206266403198242, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.713142934875005e-07, | |
| "logits/chosen": -0.7530995607376099, | |
| "logits/rejected": -0.348047137260437, | |
| "logps/chosen": -273.5533447265625, | |
| "logps/rejected": -247.33377075195312, | |
| "loss": 6019.3629, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.4809795916080475, | |
| "rewards/margins": 0.16457389295101166, | |
| "rewards/rejected": -0.645553469657898, | |
| "rewards/safe_rewards": -0.4939555525779724, | |
| "rewards/unsafe_rewards": -0.51116544008255, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.6912130020314996e-07, | |
| "logits/chosen": 0.18566010892391205, | |
| "logits/rejected": 0.4161214232444763, | |
| "logps/chosen": -233.847900390625, | |
| "logps/rejected": -238.5542755126953, | |
| "loss": 5555.243, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.6200246810913086, | |
| "rewards/margins": 0.13345691561698914, | |
| "rewards/rejected": -0.7534815073013306, | |
| "rewards/safe_rewards": -0.6095362901687622, | |
| "rewards/unsafe_rewards": -0.6309984922409058, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.668530350837408e-07, | |
| "logits/chosen": 0.024336492642760277, | |
| "logits/rejected": 0.4952603876590729, | |
| "logps/chosen": -259.33697509765625, | |
| "logps/rejected": -254.6613006591797, | |
| "loss": 5726.7293, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.5721555948257446, | |
| "rewards/margins": 0.12051858007907867, | |
| "rewards/rejected": -0.6926741600036621, | |
| "rewards/safe_rewards": -0.5316283702850342, | |
| "rewards/unsafe_rewards": -0.5645433664321899, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.64510277316316e-07, | |
| "logits/chosen": -0.0006995767471380532, | |
| "logits/rejected": 0.4036879539489746, | |
| "logps/chosen": -269.50482177734375, | |
| "logps/rejected": -248.73434448242188, | |
| "loss": 6012.2914, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.5171098113059998, | |
| "rewards/margins": 0.20941033959388733, | |
| "rewards/rejected": -0.7265201807022095, | |
| "rewards/safe_rewards": -0.5066377520561218, | |
| "rewards/unsafe_rewards": -0.4963339865207672, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.6209383167739015e-07, | |
| "logits/chosen": -0.8723047971725464, | |
| "logits/rejected": -0.47492194175720215, | |
| "logps/chosen": -239.2227020263672, | |
| "logps/rejected": -223.37191772460938, | |
| "loss": 6090.4563, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.39161261916160583, | |
| "rewards/margins": 0.16117171943187714, | |
| "rewards/rejected": -0.5527843832969666, | |
| "rewards/safe_rewards": -0.4009205400943756, | |
| "rewards/unsafe_rewards": -0.4027668535709381, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5960452825649526e-07, | |
| "logits/chosen": -0.8613616228103638, | |
| "logits/rejected": -0.5483921766281128, | |
| "logps/chosen": -252.01095581054688, | |
| "logps/rejected": -236.2162628173828, | |
| "loss": 5410.1973, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.4818722605705261, | |
| "rewards/margins": 0.12459783256053925, | |
| "rewards/rejected": -0.606469988822937, | |
| "rewards/safe_rewards": -0.4409845769405365, | |
| "rewards/unsafe_rewards": -0.48863571882247925, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_logits/chosen": -0.00993373803794384, | |
| "eval_logits/rejected": 0.6948209404945374, | |
| "eval_logps/chosen": -205.43228149414062, | |
| "eval_logps/rejected": -177.0600128173828, | |
| "eval_loss": 4657.333984375, | |
| "eval_rewards/accuracies": 0.6367472410202026, | |
| "eval_rewards/chosen": -0.6508274078369141, | |
| "eval_rewards/margins": 0.09844248741865158, | |
| "eval_rewards/rejected": -0.749269962310791, | |
| "eval_rewards/safe_rewards": -0.6381882429122925, | |
| "eval_rewards/unsafe_rewards": -0.6354333162307739, | |
| "eval_runtime": 2355.0926, | |
| "eval_samples_per_second": 14.88, | |
| "eval_steps_per_second": 0.465, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.570432221710314e-07, | |
| "logits/chosen": -0.2417004406452179, | |
| "logits/rejected": 0.17007017135620117, | |
| "logps/chosen": -273.1074523925781, | |
| "logps/rejected": -236.8904266357422, | |
| "loss": 6244.0367, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.5197592973709106, | |
| "rewards/margins": 0.19909226894378662, | |
| "rewards/rejected": -0.7188515067100525, | |
| "rewards/safe_rewards": -0.6001642942428589, | |
| "rewards/unsafe_rewards": -0.5492387413978577, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5441079327251927e-07, | |
| "logits/chosen": -0.3826223909854889, | |
| "logits/rejected": 0.10965192317962646, | |
| "logps/chosen": -261.4352722167969, | |
| "logps/rejected": -251.9311065673828, | |
| "loss": 5649.8195, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.49133262038230896, | |
| "rewards/margins": 0.11736941337585449, | |
| "rewards/rejected": -0.6087020635604858, | |
| "rewards/safe_rewards": -0.4915240406990051, | |
| "rewards/unsafe_rewards": -0.4991859793663025, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5170814584435644e-07, | |
| "logits/chosen": -0.1299566924571991, | |
| "logits/rejected": 0.30430150032043457, | |
| "logps/chosen": -281.5189514160156, | |
| "logps/rejected": -248.9510040283203, | |
| "loss": 6070.9859, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.5048553347587585, | |
| "rewards/margins": 0.17633280158042908, | |
| "rewards/rejected": -0.6811882257461548, | |
| "rewards/safe_rewards": -0.45997923612594604, | |
| "rewards/unsafe_rewards": -0.5042248964309692, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.4893620829118124e-07, | |
| "logits/chosen": 0.41155165433883667, | |
| "logits/rejected": 0.7351133227348328, | |
| "logps/chosen": -218.6739959716797, | |
| "logps/rejected": -222.22238159179688, | |
| "loss": 5773.9555, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.5764225721359253, | |
| "rewards/margins": 0.17755261063575745, | |
| "rewards/rejected": -0.7539752125740051, | |
| "rewards/safe_rewards": -0.5707100033760071, | |
| "rewards/unsafe_rewards": -0.5930426716804504, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.460959328199497e-07, | |
| "logits/chosen": 0.4961000382900238, | |
| "logits/rejected": 0.9081694483757019, | |
| "logps/chosen": -256.54791259765625, | |
| "logps/rejected": -277.130126953125, | |
| "loss": 6108.098, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6318496465682983, | |
| "rewards/margins": 0.2199208289384842, | |
| "rewards/rejected": -0.8517705202102661, | |
| "rewards/safe_rewards": -0.6448063850402832, | |
| "rewards/unsafe_rewards": -0.5973528623580933, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.4318829511283707e-07, | |
| "logits/chosen": 0.23597554862499237, | |
| "logits/rejected": 0.5608280301094055, | |
| "logps/chosen": -262.15960693359375, | |
| "logps/rejected": -276.5953369140625, | |
| "loss": 6017.0984, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7231947183609009, | |
| "rewards/margins": 0.16650545597076416, | |
| "rewards/rejected": -0.8897002339363098, | |
| "rewards/safe_rewards": -0.7144005298614502, | |
| "rewards/unsafe_rewards": -0.6883742213249207, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.40214293992074e-07, | |
| "logits/chosen": 0.30961090326309204, | |
| "logits/rejected": 0.6938155889511108, | |
| "logps/chosen": -267.58404541015625, | |
| "logps/rejected": -252.78311157226562, | |
| "loss": 6321.9309, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.5275936722755432, | |
| "rewards/margins": 0.20575468242168427, | |
| "rewards/rejected": -0.7333483099937439, | |
| "rewards/safe_rewards": -0.5182517766952515, | |
| "rewards/unsafe_rewards": -0.5568464994430542, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.3717495107683516e-07, | |
| "logits/chosen": 0.2671489417552948, | |
| "logits/rejected": 0.9092152714729309, | |
| "logps/chosen": -250.55960083007812, | |
| "logps/rejected": -235.89840698242188, | |
| "loss": 5574.8402, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.5318346619606018, | |
| "rewards/margins": 0.18946382403373718, | |
| "rewards/rejected": -0.7212985157966614, | |
| "rewards/safe_rewards": -0.5447245836257935, | |
| "rewards/unsafe_rewards": -0.5725606083869934, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.340713104322953e-07, | |
| "logits/chosen": 0.01171237975358963, | |
| "logits/rejected": 0.4629115164279938, | |
| "logps/chosen": -265.1495056152344, | |
| "logps/rejected": -259.7709045410156, | |
| "loss": 5202.8691, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.5935125946998596, | |
| "rewards/margins": 0.18529286980628967, | |
| "rewards/rejected": -0.7788054347038269, | |
| "rewards/safe_rewards": -0.6250792741775513, | |
| "rewards/unsafe_rewards": -0.6238072514533997, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.3090443821097566e-07, | |
| "logits/chosen": 0.7814422845840454, | |
| "logits/rejected": 1.1566433906555176, | |
| "logps/chosen": -278.1474609375, | |
| "logps/rejected": -280.3294677734375, | |
| "loss": 5335.1562, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6250512599945068, | |
| "rewards/margins": 0.19450877606868744, | |
| "rewards/rejected": -0.8195600509643555, | |
| "rewards/safe_rewards": -0.5736940503120422, | |
| "rewards/unsafe_rewards": -0.6311155557632446, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.276754222865029e-07, | |
| "logits/chosen": 0.546709418296814, | |
| "logits/rejected": 1.5038117170333862, | |
| "logps/chosen": -284.0765075683594, | |
| "logps/rejected": -235.79367065429688, | |
| "loss": 5880.4258, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6524443626403809, | |
| "rewards/margins": 0.17251375317573547, | |
| "rewards/rejected": -0.8249581456184387, | |
| "rewards/safe_rewards": -0.6402295231819153, | |
| "rewards/unsafe_rewards": -0.6277676224708557, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.2438537187990565e-07, | |
| "logits/chosen": 0.7865768671035767, | |
| "logits/rejected": 1.5061836242675781, | |
| "logps/chosen": -283.3603820800781, | |
| "logps/rejected": -251.56442260742188, | |
| "loss": 5760.8687, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.658532977104187, | |
| "rewards/margins": 0.21655750274658203, | |
| "rewards/rejected": -0.875090479850769, | |
| "rewards/safe_rewards": -0.6327935457229614, | |
| "rewards/unsafe_rewards": -0.6471335291862488, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.210354171785795e-07, | |
| "logits/chosen": 0.2993673086166382, | |
| "logits/rejected": 0.7917363047599792, | |
| "logps/chosen": -272.6424865722656, | |
| "logps/rejected": -247.65853881835938, | |
| "loss": 5872.0883, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5130705833435059, | |
| "rewards/margins": 0.1547364443540573, | |
| "rewards/rejected": -0.6678069829940796, | |
| "rewards/safe_rewards": -0.5059661269187927, | |
| "rewards/unsafe_rewards": -0.5222837328910828, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.1762670894804775e-07, | |
| "logits/chosen": 0.09364859014749527, | |
| "logits/rejected": 0.5361107587814331, | |
| "logps/chosen": -249.59634399414062, | |
| "logps/rejected": -237.3841094970703, | |
| "loss": 5896.1926, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.49201780557632446, | |
| "rewards/margins": 0.16005203127861023, | |
| "rewards/rejected": -0.6520698070526123, | |
| "rewards/safe_rewards": -0.549709677696228, | |
| "rewards/unsafe_rewards": -0.5637668967247009, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.1416041813665493e-07, | |
| "logits/chosen": -0.5552986860275269, | |
| "logits/rejected": -0.25023895502090454, | |
| "logps/chosen": -253.50790405273438, | |
| "logps/rejected": -253.32583618164062, | |
| "loss": 5920.0328, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.47500887513160706, | |
| "rewards/margins": 0.12813320755958557, | |
| "rewards/rejected": -0.6031420826911926, | |
| "rewards/safe_rewards": -0.43845662474632263, | |
| "rewards/unsafe_rewards": -0.45656904578208923, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.1063773547332584e-07, | |
| "logits/chosen": -0.46418723464012146, | |
| "logits/rejected": -0.049189966171979904, | |
| "logps/chosen": -267.15765380859375, | |
| "logps/rejected": -243.20010375976562, | |
| "loss": 6128.7578, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.6104855537414551, | |
| "rewards/margins": 0.10687772184610367, | |
| "rewards/rejected": -0.7173632383346558, | |
| "rewards/safe_rewards": -0.5476406216621399, | |
| "rewards/unsafe_rewards": -0.603262722492218, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.0705987105853077e-07, | |
| "logits/chosen": -0.2697436213493347, | |
| "logits/rejected": 0.344801664352417, | |
| "logps/chosen": -252.3665313720703, | |
| "logps/rejected": -232.3540496826172, | |
| "loss": 5986.7625, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5879735350608826, | |
| "rewards/margins": 0.14302758872509003, | |
| "rewards/rejected": -0.731001079082489, | |
| "rewards/safe_rewards": -0.543707013130188, | |
| "rewards/unsafe_rewards": -0.5482696294784546, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.034280539485952e-07, | |
| "logits/chosen": -0.36558887362480164, | |
| "logits/rejected": 0.18461750447750092, | |
| "logps/chosen": -295.22119140625, | |
| "logps/rejected": -274.0675354003906, | |
| "loss": 5383.9453, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.5177947878837585, | |
| "rewards/margins": 0.21047362685203552, | |
| "rewards/rejected": -0.7282685041427612, | |
| "rewards/safe_rewards": -0.5312758684158325, | |
| "rewards/unsafe_rewards": -0.5633383393287659, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.997435317334988e-07, | |
| "logits/chosen": 0.3039137125015259, | |
| "logits/rejected": 0.7977389097213745, | |
| "logps/chosen": -279.23187255859375, | |
| "logps/rejected": -261.033935546875, | |
| "loss": 5720.7707, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.5356379747390747, | |
| "rewards/margins": 0.2088995724916458, | |
| "rewards/rejected": -0.7445374131202698, | |
| "rewards/safe_rewards": -0.5458201169967651, | |
| "rewards/unsafe_rewards": -0.47182130813598633, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.960075701083074e-07, | |
| "logits/chosen": 0.06580640375614166, | |
| "logits/rejected": 0.28118953108787537, | |
| "logps/chosen": -237.80581665039062, | |
| "logps/rejected": -245.47216796875, | |
| "loss": 5702.616, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.5484215021133423, | |
| "rewards/margins": 0.16065733134746552, | |
| "rewards/rejected": -0.709078848361969, | |
| "rewards/safe_rewards": -0.5256644487380981, | |
| "rewards/unsafe_rewards": -0.5779343247413635, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.92221452438385e-07, | |
| "logits/chosen": -0.6886399388313293, | |
| "logits/rejected": -0.33862438797950745, | |
| "logps/chosen": -255.33505249023438, | |
| "logps/rejected": -234.041259765625, | |
| "loss": 5505.9277, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.5285482406616211, | |
| "rewards/margins": 0.18568384647369385, | |
| "rewards/rejected": -0.7142320871353149, | |
| "rewards/safe_rewards": -0.5484398007392883, | |
| "rewards/unsafe_rewards": -0.5874748826026917, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.8838647931853684e-07, | |
| "logits/chosen": -0.7950954437255859, | |
| "logits/rejected": -0.4466307759284973, | |
| "logps/chosen": -253.4489288330078, | |
| "logps/rejected": -254.49813842773438, | |
| "loss": 6030.682, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.5057817697525024, | |
| "rewards/margins": 0.20095935463905334, | |
| "rewards/rejected": -0.7067410945892334, | |
| "rewards/safe_rewards": -0.5353250503540039, | |
| "rewards/unsafe_rewards": -0.4995631277561188, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.845039681262332e-07, | |
| "logits/chosen": -0.5698283910751343, | |
| "logits/rejected": -0.1652621030807495, | |
| "logps/chosen": -265.46368408203125, | |
| "logps/rejected": -250.52951049804688, | |
| "loss": 5514.4148, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.45593494176864624, | |
| "rewards/margins": 0.1759863793849945, | |
| "rewards/rejected": -0.6319212913513184, | |
| "rewards/safe_rewards": -0.4363466799259186, | |
| "rewards/unsafe_rewards": -0.4330349862575531, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.805752525690681e-07, | |
| "logits/chosen": 0.09326216578483582, | |
| "logits/rejected": 0.7224725484848022, | |
| "logps/chosen": -253.9232940673828, | |
| "logps/rejected": -268.0160217285156, | |
| "loss": 5160.3754, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6526281237602234, | |
| "rewards/margins": 0.22083961963653564, | |
| "rewards/rejected": -0.8734676241874695, | |
| "rewards/safe_rewards": -0.6421413421630859, | |
| "rewards/unsafe_rewards": -0.6364503502845764, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.7660168222660824e-07, | |
| "logits/chosen": 0.43039554357528687, | |
| "logits/rejected": 0.772833526134491, | |
| "logps/chosen": -293.98541259765625, | |
| "logps/rejected": -288.250732421875, | |
| "loss": 5855.4879, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.7387111783027649, | |
| "rewards/margins": 0.16440826654434204, | |
| "rewards/rejected": -0.9031193852424622, | |
| "rewards/safe_rewards": -0.7269446849822998, | |
| "rewards/unsafe_rewards": -0.6723185777664185, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.725846220867901e-07, | |
| "logits/chosen": -0.09916634857654572, | |
| "logits/rejected": 0.4922304153442383, | |
| "logps/chosen": -265.7640686035156, | |
| "logps/rejected": -243.7411346435547, | |
| "loss": 6137.0988, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.6147286295890808, | |
| "rewards/margins": 0.14420659840106964, | |
| "rewards/rejected": -0.7589352130889893, | |
| "rewards/safe_rewards": -0.6549733877182007, | |
| "rewards/unsafe_rewards": -0.6351133584976196, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.6852545207702393e-07, | |
| "logits/chosen": -0.18887875974178314, | |
| "logits/rejected": 0.4651460647583008, | |
| "logps/chosen": -300.3460998535156, | |
| "logps/rejected": -247.0656280517578, | |
| "loss": 5956.6977, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.5610722899436951, | |
| "rewards/margins": 0.18032148480415344, | |
| "rewards/rejected": -0.7413938641548157, | |
| "rewards/safe_rewards": -0.5364476442337036, | |
| "rewards/unsafe_rewards": -0.5671006441116333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.6442556659016475e-07, | |
| "logits/chosen": 0.3691898286342621, | |
| "logits/rejected": 1.0192655324935913, | |
| "logps/chosen": -278.3470458984375, | |
| "logps/rejected": -240.86141967773438, | |
| "loss": 5414.8289, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.5899799466133118, | |
| "rewards/margins": 0.20228877663612366, | |
| "rewards/rejected": -0.7922687530517578, | |
| "rewards/safe_rewards": -0.5520480871200562, | |
| "rewards/unsafe_rewards": -0.5946981906890869, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.602863740055161e-07, | |
| "logits/chosen": 1.002415418624878, | |
| "logits/rejected": 1.6322085857391357, | |
| "logps/chosen": -268.44488525390625, | |
| "logps/rejected": -261.2592468261719, | |
| "loss": 5358.4598, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6824139356613159, | |
| "rewards/margins": 0.22263555228710175, | |
| "rewards/rejected": -0.9050495028495789, | |
| "rewards/safe_rewards": -0.6642250418663025, | |
| "rewards/unsafe_rewards": -0.6494946479797363, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.5610929620502747e-07, | |
| "logits/chosen": 0.9502559900283813, | |
| "logits/rejected": 1.4719197750091553, | |
| "logps/chosen": -271.93231201171875, | |
| "logps/rejected": -281.78125, | |
| "loss": 5792.9727, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.7460067272186279, | |
| "rewards/margins": 0.18493010103702545, | |
| "rewards/rejected": -0.9309368133544922, | |
| "rewards/safe_rewards": -0.7411947846412659, | |
| "rewards/unsafe_rewards": -0.8093317151069641, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.5189576808485404e-07, | |
| "logits/chosen": 0.7791315913200378, | |
| "logits/rejected": 1.4415690898895264, | |
| "logps/chosen": -300.54150390625, | |
| "logps/rejected": -273.402587890625, | |
| "loss": 5584.2125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7409987449645996, | |
| "rewards/margins": 0.20648033916950226, | |
| "rewards/rejected": -0.9474791288375854, | |
| "rewards/safe_rewards": -0.726071834564209, | |
| "rewards/unsafe_rewards": -0.8359003067016602, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.476472370624464e-07, | |
| "logits/chosen": 0.40392106771469116, | |
| "logits/rejected": 0.7413457632064819, | |
| "logps/chosen": -254.9908905029297, | |
| "logps/rejected": -251.4073028564453, | |
| "loss": 6101.9039, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6420382261276245, | |
| "rewards/margins": 0.13990595936775208, | |
| "rewards/rejected": -0.7819441556930542, | |
| "rewards/safe_rewards": -0.5959726572036743, | |
| "rewards/unsafe_rewards": -0.6521440744400024, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.43365162579338e-07, | |
| "logits/chosen": 0.11586692184209824, | |
| "logits/rejected": 0.49579864740371704, | |
| "logps/chosen": -226.8084716796875, | |
| "logps/rejected": -232.3746337890625, | |
| "loss": 5837.0383, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.551177442073822, | |
| "rewards/margins": 0.19108565151691437, | |
| "rewards/rejected": -0.7422630190849304, | |
| "rewards/safe_rewards": -0.5533746480941772, | |
| "rewards/unsafe_rewards": -0.5072416663169861, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.390510155998023e-07, | |
| "logits/chosen": 0.24915654957294464, | |
| "logits/rejected": 0.6536698341369629, | |
| "logps/chosen": -277.9824523925781, | |
| "logps/rejected": -249.2000732421875, | |
| "loss": 5721.2586, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.648623526096344, | |
| "rewards/margins": 0.12514245510101318, | |
| "rewards/rejected": -0.7737659811973572, | |
| "rewards/safe_rewards": -0.7092838287353516, | |
| "rewards/unsafe_rewards": -0.6900613903999329, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.347062781055526e-07, | |
| "logits/chosen": 0.5860965847969055, | |
| "logits/rejected": 0.9803635478019714, | |
| "logps/chosen": -245.1415252685547, | |
| "logps/rejected": -272.01080322265625, | |
| "loss": 5834.2676, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.6521397829055786, | |
| "rewards/margins": 0.21285566687583923, | |
| "rewards/rejected": -0.8649954795837402, | |
| "rewards/safe_rewards": -0.6472452878952026, | |
| "rewards/unsafe_rewards": -0.6902757883071899, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.303324425866559e-07, | |
| "logits/chosen": 0.6316410303115845, | |
| "logits/rejected": 0.902866005897522, | |
| "logps/chosen": -291.68597412109375, | |
| "logps/rejected": -266.18585205078125, | |
| "loss": 5964.1836, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6263974905014038, | |
| "rewards/margins": 0.17340168356895447, | |
| "rewards/rejected": -0.7997991442680359, | |
| "rewards/safe_rewards": -0.6621179580688477, | |
| "rewards/unsafe_rewards": -0.6091993451118469, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.2593101152883795e-07, | |
| "logits/chosen": 0.6831669211387634, | |
| "logits/rejected": 0.9902046918869019, | |
| "logps/chosen": -256.2884521484375, | |
| "logps/rejected": -279.5752868652344, | |
| "loss": 5961.9836, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6823039054870605, | |
| "rewards/margins": 0.17010322213172913, | |
| "rewards/rejected": -0.8524071574211121, | |
| "rewards/safe_rewards": -0.6452068090438843, | |
| "rewards/unsafe_rewards": -0.7062270641326904, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.21503496897354e-07, | |
| "logits/chosen": 0.48068660497665405, | |
| "logits/rejected": 0.952492892742157, | |
| "logps/chosen": -289.909423828125, | |
| "logps/rejected": -262.1679992675781, | |
| "loss": 6021.2465, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.7239787578582764, | |
| "rewards/margins": 0.12146921455860138, | |
| "rewards/rejected": -0.8454478979110718, | |
| "rewards/safe_rewards": -0.7816897630691528, | |
| "rewards/unsafe_rewards": -0.7392334938049316, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.170514196176037e-07, | |
| "logits/chosen": 0.28930729627609253, | |
| "logits/rejected": 0.6634337902069092, | |
| "logps/chosen": -267.9020080566406, | |
| "logps/rejected": -267.813720703125, | |
| "loss": 5325.9504, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6826976537704468, | |
| "rewards/margins": 0.18379981815814972, | |
| "rewards/rejected": -0.8664973974227905, | |
| "rewards/safe_rewards": -0.6970924139022827, | |
| "rewards/unsafe_rewards": -0.6835001111030579, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.125763090526674e-07, | |
| "logits/chosen": 0.21367737650871277, | |
| "logits/rejected": 0.6621453166007996, | |
| "logps/chosen": -278.2737731933594, | |
| "logps/rejected": -269.89404296875, | |
| "loss": 5261.0746, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6765376329421997, | |
| "rewards/margins": 0.20078134536743164, | |
| "rewards/rejected": -0.8773189783096313, | |
| "rewards/safe_rewards": -0.6867783665657043, | |
| "rewards/unsafe_rewards": -0.6920818090438843, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.080797024779447e-07, | |
| "logits/chosen": 0.19137686491012573, | |
| "logits/rejected": 0.7889005541801453, | |
| "logps/chosen": -253.41421508789062, | |
| "logps/rejected": -236.6729278564453, | |
| "loss": 5719.0418, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.6732780933380127, | |
| "rewards/margins": 0.19284026324748993, | |
| "rewards/rejected": -0.866118311882019, | |
| "rewards/safe_rewards": -0.7765754461288452, | |
| "rewards/unsafe_rewards": -0.682191014289856, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.035631445530743e-07, | |
| "logits/chosen": 0.4879905581474304, | |
| "logits/rejected": 0.9158290028572083, | |
| "logps/chosen": -290.2519226074219, | |
| "logps/rejected": -284.17071533203125, | |
| "loss": 5561.2797, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.7149994969367981, | |
| "rewards/margins": 0.19377604126930237, | |
| "rewards/rejected": -0.9087755084037781, | |
| "rewards/safe_rewards": -0.6696754693984985, | |
| "rewards/unsafe_rewards": -0.6708149313926697, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.9902818679131775e-07, | |
| "logits/chosen": 0.3951093852519989, | |
| "logits/rejected": 0.8302197456359863, | |
| "logps/chosen": -271.294189453125, | |
| "logps/rejected": -253.5810546875, | |
| "loss": 5419.4855, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.7780183553695679, | |
| "rewards/margins": 0.17024961113929749, | |
| "rewards/rejected": -0.9482680559158325, | |
| "rewards/safe_rewards": -0.7877544164657593, | |
| "rewards/unsafe_rewards": -0.7789348363876343, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.944763870265886e-07, | |
| "logits/chosen": -0.13839875161647797, | |
| "logits/rejected": 0.3581174314022064, | |
| "logps/chosen": -272.4313659667969, | |
| "logps/rejected": -267.915771484375, | |
| "loss": 5453.8977, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6422435641288757, | |
| "rewards/margins": 0.19745132327079773, | |
| "rewards/rejected": -0.8396948575973511, | |
| "rewards/safe_rewards": -0.6758723258972168, | |
| "rewards/unsafe_rewards": -0.578320324420929, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.899093088783105e-07, | |
| "logits/chosen": -0.06241287663578987, | |
| "logits/rejected": 0.4015175700187683, | |
| "logps/chosen": -294.8834533691406, | |
| "logps/rejected": -279.0429382324219, | |
| "loss": 5278.1754, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6345726847648621, | |
| "rewards/margins": 0.14065605401992798, | |
| "rewards/rejected": -0.7752287983894348, | |
| "rewards/safe_rewards": -0.6587311029434204, | |
| "rewards/unsafe_rewards": -0.6476761102676392, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.8532852121428733e-07, | |
| "logits/chosen": -0.04936225712299347, | |
| "logits/rejected": 0.38959282636642456, | |
| "logps/chosen": -248.14639282226562, | |
| "logps/rejected": -235.8994598388672, | |
| "loss": 5653.668, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.5577735304832458, | |
| "rewards/margins": 0.21775202453136444, | |
| "rewards/rejected": -0.7755255699157715, | |
| "rewards/safe_rewards": -0.55736243724823, | |
| "rewards/unsafe_rewards": -0.5908164978027344, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.807355976117716e-07, | |
| "logits/chosen": 0.11599000543355942, | |
| "logits/rejected": 0.49212461709976196, | |
| "logps/chosen": -284.78472900390625, | |
| "logps/rejected": -265.7978515625, | |
| "loss": 5924.3578, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.5290887355804443, | |
| "rewards/margins": 0.22062186896800995, | |
| "rewards/rejected": -0.7497105598449707, | |
| "rewards/safe_rewards": -0.4509585499763489, | |
| "rewards/unsafe_rewards": -0.5535848736763, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.761321158169134e-07, | |
| "logits/chosen": -0.0665382593870163, | |
| "logits/rejected": 0.4467547535896301, | |
| "logps/chosen": -262.4479064941406, | |
| "logps/rejected": -265.8846740722656, | |
| "loss": 5391.7484, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.604932427406311, | |
| "rewards/margins": 0.16624750196933746, | |
| "rewards/rejected": -0.7711800336837769, | |
| "rewards/safe_rewards": -0.570032000541687, | |
| "rewards/unsafe_rewards": -0.6088122129440308, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.715196572027789e-07, | |
| "logits/chosen": 0.15862391889095306, | |
| "logits/rejected": 0.511070966720581, | |
| "logps/chosen": -252.94137573242188, | |
| "logps/rejected": -255.08187866210938, | |
| "loss": 5628.2164, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6333836913108826, | |
| "rewards/margins": 0.20889365673065186, | |
| "rewards/rejected": -0.8422773480415344, | |
| "rewards/safe_rewards": -0.6369217038154602, | |
| "rewards/unsafe_rewards": -0.6703649163246155, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6689980622612204e-07, | |
| "logits/chosen": 0.08565627038478851, | |
| "logits/rejected": 0.5222666263580322, | |
| "logps/chosen": -255.2662811279297, | |
| "logps/rejected": -253.49105834960938, | |
| "loss": 5634.6316, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6020347476005554, | |
| "rewards/margins": 0.19342327117919922, | |
| "rewards/rejected": -0.7954580187797546, | |
| "rewards/safe_rewards": -0.6501786708831787, | |
| "rewards/unsafe_rewards": -0.6461445093154907, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_logits/chosen": 0.41202229261398315, | |
| "eval_logits/rejected": 1.1542474031448364, | |
| "eval_logps/chosen": -220.34913635253906, | |
| "eval_logps/rejected": -189.61671447753906, | |
| "eval_loss": 4507.89453125, | |
| "eval_rewards/accuracies": 0.6151915788650513, | |
| "eval_rewards/chosen": -0.799996018409729, | |
| "eval_rewards/margins": 0.07484080642461777, | |
| "eval_rewards/rejected": -0.874836802482605, | |
| "eval_rewards/safe_rewards": -0.7885684370994568, | |
| "eval_rewards/unsafe_rewards": -0.784635066986084, | |
| "eval_runtime": 2353.482, | |
| "eval_samples_per_second": 14.89, | |
| "eval_steps_per_second": 0.466, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.622741498830969e-07, | |
| "logits/chosen": 0.2431926727294922, | |
| "logits/rejected": 0.40795207023620605, | |
| "logps/chosen": -279.1517333984375, | |
| "logps/rejected": -271.7449645996094, | |
| "loss": 5872.2367, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.6438090801239014, | |
| "rewards/margins": 0.17429831624031067, | |
| "rewards/rejected": -0.8181073069572449, | |
| "rewards/safe_rewards": -0.6910767555236816, | |
| "rewards/unsafe_rewards": -0.6460915803909302, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.5764427716409815e-07, | |
| "logits/chosen": -0.09687475860118866, | |
| "logits/rejected": 0.4301505982875824, | |
| "logps/chosen": -272.0554504394531, | |
| "logps/rejected": -255.6719207763672, | |
| "loss": 5816.6723, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.5806029438972473, | |
| "rewards/margins": 0.19818606972694397, | |
| "rewards/rejected": -0.7787889838218689, | |
| "rewards/safe_rewards": -0.5169692635536194, | |
| "rewards/unsafe_rewards": -0.5289751291275024, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.5301177850791616e-07, | |
| "logits/chosen": 0.01663217321038246, | |
| "logits/rejected": 0.6527854204177856, | |
| "logps/chosen": -290.3711853027344, | |
| "logps/rejected": -268.1048278808594, | |
| "loss": 5912.7102, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6295832395553589, | |
| "rewards/margins": 0.20760869979858398, | |
| "rewards/rejected": -0.8371919393539429, | |
| "rewards/safe_rewards": -0.642471432685852, | |
| "rewards/unsafe_rewards": -0.6146708726882935, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4837824525539477e-07, | |
| "logits/chosen": 0.17375509440898895, | |
| "logits/rejected": 0.7390264272689819, | |
| "logps/chosen": -270.261474609375, | |
| "logps/rejected": -261.2465515136719, | |
| "loss": 5659.6238, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6727645993232727, | |
| "rewards/margins": 0.17281220853328705, | |
| "rewards/rejected": -0.8455768823623657, | |
| "rewards/safe_rewards": -0.6424635052680969, | |
| "rewards/unsafe_rewards": -0.6337414979934692, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.4374526910277886e-07, | |
| "logits/chosen": 0.13272862136363983, | |
| "logits/rejected": 0.57741779088974, | |
| "logps/chosen": -270.9297790527344, | |
| "logps/rejected": -267.14471435546875, | |
| "loss": 5861.1039, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6448026895523071, | |
| "rewards/margins": 0.2006601095199585, | |
| "rewards/rejected": -0.8454626798629761, | |
| "rewards/safe_rewards": -0.6065593361854553, | |
| "rewards/unsafe_rewards": -0.6479047536849976, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.391144415549403e-07, | |
| "logits/chosen": 0.2520432770252228, | |
| "logits/rejected": 0.7386651039123535, | |
| "logps/chosen": -256.0111389160156, | |
| "logps/rejected": -244.1455535888672, | |
| "loss": 5928.0605, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.6962358355522156, | |
| "rewards/margins": 0.125870481133461, | |
| "rewards/rejected": -0.8221063613891602, | |
| "rewards/safe_rewards": -0.6803200244903564, | |
| "rewards/unsafe_rewards": -0.6994472742080688, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.3448735337866919e-07, | |
| "logits/chosen": 0.26303520798683167, | |
| "logits/rejected": 0.7426208257675171, | |
| "logps/chosen": -247.3863983154297, | |
| "logps/rejected": -244.02392578125, | |
| "loss": 5880.1039, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6635211706161499, | |
| "rewards/margins": 0.15260052680969238, | |
| "rewards/rejected": -0.8161218762397766, | |
| "rewards/safe_rewards": -0.706309974193573, | |
| "rewards/unsafe_rewards": -0.6638337969779968, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.2986559405621886e-07, | |
| "logits/chosen": 0.030937856063246727, | |
| "logits/rejected": 0.47169026732444763, | |
| "logps/chosen": -279.0972595214844, | |
| "logps/rejected": -268.9930725097656, | |
| "loss": 5616.6, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6163111925125122, | |
| "rewards/margins": 0.16996563971042633, | |
| "rewards/rejected": -0.7862768173217773, | |
| "rewards/safe_rewards": -0.6654713749885559, | |
| "rewards/unsafe_rewards": -0.6399198770523071, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2525075123929213e-07, | |
| "logits/chosen": 0.43386760354042053, | |
| "logits/rejected": 0.7538164258003235, | |
| "logps/chosen": -267.44134521484375, | |
| "logps/rejected": -258.99249267578125, | |
| "loss": 5716.7879, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.6649960279464722, | |
| "rewards/margins": 0.22522863745689392, | |
| "rewards/rejected": -0.890224814414978, | |
| "rewards/safe_rewards": -0.6375536322593689, | |
| "rewards/unsafe_rewards": -0.6348733901977539, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.206444102036565e-07, | |
| "logits/chosen": 0.6684126257896423, | |
| "logits/rejected": 0.9879862666130066, | |
| "logps/chosen": -267.1449279785156, | |
| "logps/rejected": -270.4283752441406, | |
| "loss": 5974.3918, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.74274742603302, | |
| "rewards/margins": 0.15645694732666016, | |
| "rewards/rejected": -0.899204432964325, | |
| "rewards/safe_rewards": -0.7267962694168091, | |
| "rewards/unsafe_rewards": -0.6818505525588989, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.160481533045751e-07, | |
| "logits/chosen": 0.4061971604824066, | |
| "logits/rejected": 0.9739459753036499, | |
| "logps/chosen": -285.2103271484375, | |
| "logps/rejected": -266.5544128417969, | |
| "loss": 5749.7781, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.7457272410392761, | |
| "rewards/margins": 0.2004440277814865, | |
| "rewards/rejected": -0.9461711645126343, | |
| "rewards/safe_rewards": -0.7860220670700073, | |
| "rewards/unsafe_rewards": -0.7390663623809814, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.1146355943324148e-07, | |
| "logits/chosen": 0.48321422934532166, | |
| "logits/rejected": 0.9058516621589661, | |
| "logps/chosen": -271.53924560546875, | |
| "logps/rejected": -259.0006103515625, | |
| "loss": 5805.548, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7600331902503967, | |
| "rewards/margins": 0.13751891255378723, | |
| "rewards/rejected": -0.8975521326065063, | |
| "rewards/safe_rewards": -0.7516414523124695, | |
| "rewards/unsafe_rewards": -0.7484757304191589, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0689220347440374e-07, | |
| "logits/chosen": 0.1501261442899704, | |
| "logits/rejected": 0.688166618347168, | |
| "logps/chosen": -301.4822082519531, | |
| "logps/rejected": -273.8033447265625, | |
| "loss": 5622.9852, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6868051290512085, | |
| "rewards/margins": 0.17512689530849457, | |
| "rewards/rejected": -0.8619319796562195, | |
| "rewards/safe_rewards": -0.6461024284362793, | |
| "rewards/unsafe_rewards": -0.6649470329284668, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0233565576536564e-07, | |
| "logits/chosen": 0.05991173908114433, | |
| "logits/rejected": 0.42331352829933167, | |
| "logps/chosen": -294.298095703125, | |
| "logps/rejected": -287.5555419921875, | |
| "loss": 5822.3992, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.7161829471588135, | |
| "rewards/margins": 0.13876894116401672, | |
| "rewards/rejected": -0.8549518585205078, | |
| "rewards/safe_rewards": -0.7057495713233948, | |
| "rewards/unsafe_rewards": -0.6698770523071289, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.97795481556549e-07, | |
| "logits/chosen": -0.03588150069117546, | |
| "logits/rejected": 0.400505006313324, | |
| "logps/chosen": -277.2012023925781, | |
| "logps/rejected": -247.14804077148438, | |
| "loss": 5935.0914, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6964778304100037, | |
| "rewards/margins": 0.17653243243694305, | |
| "rewards/rejected": -0.8730102777481079, | |
| "rewards/safe_rewards": -0.6869702339172363, | |
| "rewards/unsafe_rewards": -0.6601093411445618, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9327324047380422e-07, | |
| "logits/chosen": -0.08701475709676743, | |
| "logits/rejected": 0.4873865246772766, | |
| "logps/chosen": -263.2158203125, | |
| "logps/rejected": -258.84039306640625, | |
| "loss": 5564.0863, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6252955198287964, | |
| "rewards/margins": 0.22415871918201447, | |
| "rewards/rejected": -0.8494542241096497, | |
| "rewards/safe_rewards": -0.6420432329177856, | |
| "rewards/unsafe_rewards": -0.6124902963638306, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.887704859826528e-07, | |
| "logits/chosen": 0.07522957026958466, | |
| "logits/rejected": 0.3329767882823944, | |
| "logps/chosen": -285.8026123046875, | |
| "logps/rejected": -266.8732604980469, | |
| "loss": 5750.982, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.6510334014892578, | |
| "rewards/margins": 0.10930682718753815, | |
| "rewards/rejected": -0.7603402137756348, | |
| "rewards/safe_rewards": -0.6223952174186707, | |
| "rewards/unsafe_rewards": -0.6682702302932739, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8428876485464572e-07, | |
| "logits/chosen": -0.15613001585006714, | |
| "logits/rejected": 0.41360145807266235, | |
| "logps/chosen": -238.16897583007812, | |
| "logps/rejected": -225.97802734375, | |
| "loss": 5979.2156, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.5804222822189331, | |
| "rewards/margins": 0.1743427962064743, | |
| "rewards/rejected": -0.7547650933265686, | |
| "rewards/safe_rewards": -0.5962327718734741, | |
| "rewards/unsafe_rewards": -0.6777797341346741, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.798296166360216e-07, | |
| "logits/chosen": -0.029682714492082596, | |
| "logits/rejected": 0.5113533139228821, | |
| "logps/chosen": -290.142822265625, | |
| "logps/rejected": -269.4226989746094, | |
| "loss": 6057.1922, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6241404414176941, | |
| "rewards/margins": 0.1994599997997284, | |
| "rewards/rejected": -0.8236004114151001, | |
| "rewards/safe_rewards": -0.6254442930221558, | |
| "rewards/unsafe_rewards": -0.6271675229072571, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7539457311884675e-07, | |
| "logits/chosen": 0.1500866711139679, | |
| "logits/rejected": 0.5680428743362427, | |
| "logps/chosen": -262.3311462402344, | |
| "logps/rejected": -251.67489624023438, | |
| "loss": 5421.8398, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6509288549423218, | |
| "rewards/margins": 0.2198909968137741, | |
| "rewards/rejected": -0.8708198666572571, | |
| "rewards/safe_rewards": -0.6651867032051086, | |
| "rewards/unsafe_rewards": -0.6189877390861511, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7098515781481883e-07, | |
| "logits/chosen": 0.4903317987918854, | |
| "logits/rejected": 0.883372962474823, | |
| "logps/chosen": -272.56097412109375, | |
| "logps/rejected": -241.92919921875, | |
| "loss": 5678.3117, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.6993108987808228, | |
| "rewards/margins": 0.11801446974277496, | |
| "rewards/rejected": -0.8173252940177917, | |
| "rewards/safe_rewards": -0.6638237237930298, | |
| "rewards/unsafe_rewards": -0.6766722202301025, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.6660288543191568e-07, | |
| "logits/chosen": 0.20008230209350586, | |
| "logits/rejected": 1.072401523590088, | |
| "logps/chosen": -292.7231140136719, | |
| "logps/rejected": -264.1849365234375, | |
| "loss": 5411.0453, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6634177565574646, | |
| "rewards/margins": 0.19502988457679749, | |
| "rewards/rejected": -0.8584476709365845, | |
| "rewards/safe_rewards": -0.7102524638175964, | |
| "rewards/unsafe_rewards": -0.6833497285842896, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.6224926135406693e-07, | |
| "logits/chosen": 0.4110666811466217, | |
| "logits/rejected": 0.9241645932197571, | |
| "logps/chosen": -291.5517272949219, | |
| "logps/rejected": -268.79437255859375, | |
| "loss": 5535.6395, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6780111193656921, | |
| "rewards/margins": 0.2115507870912552, | |
| "rewards/rejected": -0.8895619511604309, | |
| "rewards/safe_rewards": -0.6748231053352356, | |
| "rewards/unsafe_rewards": -0.7003692984580994, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.579257811240298e-07, | |
| "logits/chosen": 0.17879924178123474, | |
| "logits/rejected": 0.82609623670578, | |
| "logps/chosen": -283.47686767578125, | |
| "logps/rejected": -269.6540832519531, | |
| "loss": 5427.3156, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.7036404609680176, | |
| "rewards/margins": 0.14344856142997742, | |
| "rewards/rejected": -0.8470889925956726, | |
| "rewards/safe_rewards": -0.6846009492874146, | |
| "rewards/unsafe_rewards": -0.6783186197280884, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.5363392992964523e-07, | |
| "logits/chosen": 0.4139084815979004, | |
| "logits/rejected": 0.7215920686721802, | |
| "logps/chosen": -257.33319091796875, | |
| "logps/rejected": -258.1666564941406, | |
| "loss": 5595.8969, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.7196224927902222, | |
| "rewards/margins": 0.11075691878795624, | |
| "rewards/rejected": -0.8303793668746948, | |
| "rewards/safe_rewards": -0.7594167590141296, | |
| "rewards/unsafe_rewards": -0.7032173275947571, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.4937518209365108e-07, | |
| "logits/chosen": 0.2804068922996521, | |
| "logits/rejected": 0.7492934465408325, | |
| "logps/chosen": -299.9917297363281, | |
| "logps/rejected": -274.86566162109375, | |
| "loss": 5485.5156, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6413429975509644, | |
| "rewards/margins": 0.18771231174468994, | |
| "rewards/rejected": -0.8290553092956543, | |
| "rewards/safe_rewards": -0.6320935487747192, | |
| "rewards/unsafe_rewards": -0.6288415789604187, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.4515100056722708e-07, | |
| "logits/chosen": 0.49235549569129944, | |
| "logits/rejected": 0.896806538105011, | |
| "logps/chosen": -250.7898712158203, | |
| "logps/rejected": -248.735107421875, | |
| "loss": 5635.8461, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6494947671890259, | |
| "rewards/margins": 0.2068520337343216, | |
| "rewards/rejected": -0.8563467860221863, | |
| "rewards/safe_rewards": -0.6947168707847595, | |
| "rewards/unsafe_rewards": -0.6628744602203369, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.4096283642744716e-07, | |
| "logits/chosen": 0.564648449420929, | |
| "logits/rejected": 1.1666864156723022, | |
| "logps/chosen": -287.2496337890625, | |
| "logps/rejected": -269.12689208984375, | |
| "loss": 5744.0652, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6512799263000488, | |
| "rewards/margins": 0.23767797648906708, | |
| "rewards/rejected": -0.8889577984809875, | |
| "rewards/safe_rewards": -0.6507743000984192, | |
| "rewards/unsafe_rewards": -0.6260145306587219, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.3681212837880977e-07, | |
| "logits/chosen": 0.3310979902744293, | |
| "logits/rejected": 0.946731686592102, | |
| "logps/chosen": -283.14178466796875, | |
| "logps/rejected": -268.6293029785156, | |
| "loss": 5538.1773, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6541503667831421, | |
| "rewards/margins": 0.20235121250152588, | |
| "rewards/rejected": -0.856501579284668, | |
| "rewards/safe_rewards": -0.7126244902610779, | |
| "rewards/unsafe_rewards": -0.6116858124732971, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.3270030225901908e-07, | |
| "logits/chosen": 0.21446232497692108, | |
| "logits/rejected": 0.9988247156143188, | |
| "logps/chosen": -311.952392578125, | |
| "logps/rejected": -264.99005126953125, | |
| "loss": 5863.9875, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.6609299778938293, | |
| "rewards/margins": 0.20790867507457733, | |
| "rewards/rejected": -0.8688386678695679, | |
| "rewards/safe_rewards": -0.6820018291473389, | |
| "rewards/unsafe_rewards": -0.6768487691879272, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.2862877054918572e-07, | |
| "logits/chosen": 0.43877673149108887, | |
| "logits/rejected": 0.7122836112976074, | |
| "logps/chosen": -263.78924560546875, | |
| "logps/rejected": -267.306884765625, | |
| "loss": 5915.4555, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6279779672622681, | |
| "rewards/margins": 0.19203224778175354, | |
| "rewards/rejected": -0.8200103044509888, | |
| "rewards/safe_rewards": -0.5540001392364502, | |
| "rewards/unsafe_rewards": -0.6103017926216125, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.2459893188861613e-07, | |
| "logits/chosen": 0.11050845682621002, | |
| "logits/rejected": 0.638201117515564, | |
| "logps/chosen": -230.92892456054688, | |
| "logps/rejected": -223.246826171875, | |
| "loss": 5522.6379, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.5677499771118164, | |
| "rewards/margins": 0.1929033249616623, | |
| "rewards/rejected": -0.7606532573699951, | |
| "rewards/safe_rewards": -0.6029695272445679, | |
| "rewards/unsafe_rewards": -0.6227617859840393, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.206121705943558e-07, | |
| "logits/chosen": 0.2380530834197998, | |
| "logits/rejected": 0.772462785243988, | |
| "logps/chosen": -265.9678039550781, | |
| "logps/rejected": -236.330078125, | |
| "loss": 5444.8687, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.5695582628250122, | |
| "rewards/margins": 0.17861400544643402, | |
| "rewards/rejected": -0.7481723427772522, | |
| "rewards/safe_rewards": -0.4967488646507263, | |
| "rewards/unsafe_rewards": -0.5609390139579773, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.1666985618565422e-07, | |
| "logits/chosen": 0.7791303396224976, | |
| "logits/rejected": 1.0070080757141113, | |
| "logps/chosen": -239.6016082763672, | |
| "logps/rejected": -250.1675567626953, | |
| "loss": 5496.5402, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.643204391002655, | |
| "rewards/margins": 0.212922140955925, | |
| "rewards/rejected": -0.856126606464386, | |
| "rewards/safe_rewards": -0.6307708024978638, | |
| "rewards/unsafe_rewards": -0.6205247044563293, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1277334291351145e-07, | |
| "logits/chosen": 0.6811083555221558, | |
| "logits/rejected": 1.2308669090270996, | |
| "logps/chosen": -240.9481964111328, | |
| "logps/rejected": -251.2366485595703, | |
| "loss": 5451.2172, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6521676778793335, | |
| "rewards/margins": 0.1860547959804535, | |
| "rewards/rejected": -0.8382223844528198, | |
| "rewards/safe_rewards": -0.7259255647659302, | |
| "rewards/unsafe_rewards": -0.6219838857650757, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.089239692954701e-07, | |
| "logits/chosen": 0.36615195870399475, | |
| "logits/rejected": 0.9472381472587585, | |
| "logps/chosen": -269.5465087890625, | |
| "logps/rejected": -256.1499328613281, | |
| "loss": 5717.6105, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.6657227873802185, | |
| "rewards/margins": 0.15908706188201904, | |
| "rewards/rejected": -0.8248098492622375, | |
| "rewards/safe_rewards": -0.7341758012771606, | |
| "rewards/unsafe_rewards": -0.6227680444717407, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.051230576558127e-07, | |
| "logits/chosen": 0.7043350338935852, | |
| "logits/rejected": 1.012446641921997, | |
| "logps/chosen": -265.9175720214844, | |
| "logps/rejected": -296.2731628417969, | |
| "loss": 5307.2445, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.7264591455459595, | |
| "rewards/margins": 0.1706809252500534, | |
| "rewards/rejected": -0.8971401453018188, | |
| "rewards/safe_rewards": -0.7796869277954102, | |
| "rewards/unsafe_rewards": -0.7442405819892883, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.0137191367132078e-07, | |
| "logits/chosen": 0.5799378156661987, | |
| "logits/rejected": 1.0962615013122559, | |
| "logps/chosen": -280.27587890625, | |
| "logps/rejected": -261.3016052246094, | |
| "loss": 5462.4613, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.659958004951477, | |
| "rewards/margins": 0.24963033199310303, | |
| "rewards/rejected": -0.9095882177352905, | |
| "rewards/safe_rewards": -0.6955925226211548, | |
| "rewards/unsafe_rewards": -0.6324699521064758, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.76718259227532e-08, | |
| "logits/chosen": 0.498538076877594, | |
| "logits/rejected": 0.9989287257194519, | |
| "logps/chosen": -272.96820068359375, | |
| "logps/rejected": -256.63140869140625, | |
| "loss": 5331.4734, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6205289363861084, | |
| "rewards/margins": 0.21373698115348816, | |
| "rewards/rejected": -0.8342660069465637, | |
| "rewards/safe_rewards": -0.5949203372001648, | |
| "rewards/unsafe_rewards": -0.6141771674156189, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.402406545219676e-08, | |
| "logits/chosen": 0.34590667486190796, | |
| "logits/rejected": 0.8703553080558777, | |
| "logps/chosen": -273.8531188964844, | |
| "logps/rejected": -247.87466430664062, | |
| "loss": 5546.1305, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.6622526049613953, | |
| "rewards/margins": 0.1561700403690338, | |
| "rewards/rejected": -0.8184226751327515, | |
| "rewards/safe_rewards": -0.6668413281440735, | |
| "rewards/unsafe_rewards": -0.6589676141738892, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.042988532644249e-08, | |
| "logits/chosen": 0.2142190933227539, | |
| "logits/rejected": 0.5996747016906738, | |
| "logps/chosen": -308.82635498046875, | |
| "logps/rejected": -276.37823486328125, | |
| "loss": 5583.4395, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.5863175392150879, | |
| "rewards/margins": 0.23458845913410187, | |
| "rewards/rejected": -0.8209059834480286, | |
| "rewards/safe_rewards": -0.5638710260391235, | |
| "rewards/unsafe_rewards": -0.5323917269706726, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.689052020653592e-08, | |
| "logits/chosen": -0.06605692207813263, | |
| "logits/rejected": 0.6343873739242554, | |
| "logps/chosen": -285.37225341796875, | |
| "logps/rejected": -252.3105010986328, | |
| "loss": 5576.0598, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.5753235816955566, | |
| "rewards/margins": 0.2064014971256256, | |
| "rewards/rejected": -0.7817251086235046, | |
| "rewards/safe_rewards": -0.5231102705001831, | |
| "rewards/unsafe_rewards": -0.5478030443191528, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 8.340718592365037e-08, | |
| "logits/chosen": 0.4551053047180176, | |
| "logits/rejected": 0.6916473507881165, | |
| "logps/chosen": -259.25543212890625, | |
| "logps/rejected": -269.81097412109375, | |
| "loss": 5258.8734, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.6683470010757446, | |
| "rewards/margins": 0.16762246191501617, | |
| "rewards/rejected": -0.8359693288803101, | |
| "rewards/safe_rewards": -0.6167613863945007, | |
| "rewards/unsafe_rewards": -0.6983481645584106, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.998107906142839e-08, | |
| "logits/chosen": 0.4198254942893982, | |
| "logits/rejected": 0.9249162673950195, | |
| "logps/chosen": -256.2335205078125, | |
| "logps/rejected": -243.9502716064453, | |
| "loss": 5150.4359, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6530503034591675, | |
| "rewards/margins": 0.22125795483589172, | |
| "rewards/rejected": -0.8743082880973816, | |
| "rewards/safe_rewards": -0.6435777544975281, | |
| "rewards/unsafe_rewards": -0.6962872743606567, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.661337654493575e-08, | |
| "logits/chosen": 0.11405469477176666, | |
| "logits/rejected": 0.8541787266731262, | |
| "logps/chosen": -285.04632568359375, | |
| "logps/rejected": -264.7653503417969, | |
| "loss": 5838.1379, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6224103569984436, | |
| "rewards/margins": 0.20319974422454834, | |
| "rewards/rejected": -0.8256100416183472, | |
| "rewards/safe_rewards": -0.6171637773513794, | |
| "rewards/unsafe_rewards": -0.5961381793022156, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.330523523636751e-08, | |
| "logits/chosen": 0.33853933215141296, | |
| "logits/rejected": 0.5890348553657532, | |
| "logps/chosen": -267.7184753417969, | |
| "logps/rejected": -279.6230163574219, | |
| "loss": 5326.7477, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6186683177947998, | |
| "rewards/margins": 0.19817940890789032, | |
| "rewards/rejected": -0.8168476819992065, | |
| "rewards/safe_rewards": -0.6040722727775574, | |
| "rewards/unsafe_rewards": -0.6181649565696716, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 7.005779153764682e-08, | |
| "logits/chosen": 0.4181288182735443, | |
| "logits/rejected": 0.7393978238105774, | |
| "logps/chosen": -249.9525909423828, | |
| "logps/rejected": -242.4307861328125, | |
| "loss": 5633.5648, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6368721723556519, | |
| "rewards/margins": 0.15112480521202087, | |
| "rewards/rejected": -0.7879970073699951, | |
| "rewards/safe_rewards": -0.6358110308647156, | |
| "rewards/unsafe_rewards": -0.6208546161651611, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.687216100005138e-08, | |
| "logits/chosen": 0.6848994493484497, | |
| "logits/rejected": 1.1733933687210083, | |
| "logps/chosen": -284.51080322265625, | |
| "logps/rejected": -288.7901916503906, | |
| "loss": 5048.4258, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6762335300445557, | |
| "rewards/margins": 0.1719200611114502, | |
| "rewards/rejected": -0.8481537103652954, | |
| "rewards/safe_rewards": -0.6376355290412903, | |
| "rewards/unsafe_rewards": -0.7184177041053772, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.374943794100349e-08, | |
| "logits/chosen": 0.48638778924942017, | |
| "logits/rejected": 1.259670615196228, | |
| "logps/chosen": -267.34588623046875, | |
| "logps/rejected": -245.59756469726562, | |
| "loss": 5545.4941, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.6003537178039551, | |
| "rewards/margins": 0.22699756920337677, | |
| "rewards/rejected": -0.8273512721061707, | |
| "rewards/safe_rewards": -0.6312727332115173, | |
| "rewards/unsafe_rewards": -0.6281502842903137, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.069069506815325e-08, | |
| "logits/chosen": 0.7533052563667297, | |
| "logits/rejected": 1.2028855085372925, | |
| "logps/chosen": -251.12496948242188, | |
| "logps/rejected": -253.78408813476562, | |
| "loss": 5749.5141, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6362664103507996, | |
| "rewards/margins": 0.2198611944913864, | |
| "rewards/rejected": -0.8561276197433472, | |
| "rewards/safe_rewards": -0.622052013874054, | |
| "rewards/unsafe_rewards": -0.704675555229187, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_logits/chosen": 1.0718276500701904, | |
| "eval_logits/rejected": 1.9546749591827393, | |
| "eval_logps/chosen": -228.9304656982422, | |
| "eval_logps/rejected": -199.36412048339844, | |
| "eval_loss": 4458.44287109375, | |
| "eval_rewards/accuracies": 0.6194114685058594, | |
| "eval_rewards/chosen": -0.8858092427253723, | |
| "eval_rewards/margins": 0.0865015909075737, | |
| "eval_rewards/rejected": -0.9723107814788818, | |
| "eval_rewards/safe_rewards": -0.874053955078125, | |
| "eval_rewards/unsafe_rewards": -0.8699882626533508, | |
| "eval_runtime": 2349.2554, | |
| "eval_samples_per_second": 14.917, | |
| "eval_steps_per_second": 0.467, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.7696983110885746e-08, | |
| "logits/chosen": 1.0346394777297974, | |
| "logits/rejected": 1.4075425863265991, | |
| "logps/chosen": -264.0049133300781, | |
| "logps/rejected": -256.81793212890625, | |
| "loss": 5875.7254, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.7450360059738159, | |
| "rewards/margins": 0.13777832686901093, | |
| "rewards/rejected": -0.8828142881393433, | |
| "rewards/safe_rewards": -0.6767371892929077, | |
| "rewards/unsafe_rewards": -0.7506189942359924, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.47693304593777e-08, | |
| "logits/chosen": 0.577034056186676, | |
| "logits/rejected": 1.2275969982147217, | |
| "logps/chosen": -280.673583984375, | |
| "logps/rejected": -243.10635375976562, | |
| "loss": 5531.6125, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.6422880291938782, | |
| "rewards/margins": 0.22371160984039307, | |
| "rewards/rejected": -0.8659995794296265, | |
| "rewards/safe_rewards": -0.5432512164115906, | |
| "rewards/unsafe_rewards": -0.6611617803573608, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.190874281132851e-08, | |
| "logits/chosen": 0.6209213733673096, | |
| "logits/rejected": 0.9749325513839722, | |
| "logps/chosen": -258.8196716308594, | |
| "logps/rejected": -247.3189697265625, | |
| "loss": 5541.2727, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.6575254201889038, | |
| "rewards/margins": 0.12947872281074524, | |
| "rewards/rejected": -0.7870042324066162, | |
| "rewards/safe_rewards": -0.7655413746833801, | |
| "rewards/unsafe_rewards": -0.7101870775222778, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.9116202826486045e-08, | |
| "logits/chosen": 0.7310935258865356, | |
| "logits/rejected": 1.0775771141052246, | |
| "logps/chosen": -272.3906555175781, | |
| "logps/rejected": -257.2728271484375, | |
| "loss": 5545.8492, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.6876263618469238, | |
| "rewards/margins": 0.16089771687984467, | |
| "rewards/rejected": -0.8485240936279297, | |
| "rewards/safe_rewards": -0.6295339465141296, | |
| "rewards/unsafe_rewards": -0.7383956909179688, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.639266978908676e-08, | |
| "logits/chosen": 0.6267167329788208, | |
| "logits/rejected": 1.1266528367996216, | |
| "logps/chosen": -297.58380126953125, | |
| "logps/rejected": -271.4803161621094, | |
| "loss": 5131.627, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.6685757637023926, | |
| "rewards/margins": 0.18729698657989502, | |
| "rewards/rejected": -0.8558727502822876, | |
| "rewards/safe_rewards": -0.6740354299545288, | |
| "rewards/unsafe_rewards": -0.6281224489212036, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.373907927832513e-08, | |
| "logits/chosen": 0.6049357056617737, | |
| "logits/rejected": 0.9919975996017456, | |
| "logps/chosen": -265.62481689453125, | |
| "logps/rejected": -285.9028625488281, | |
| "loss": 5640.1398, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6182764172554016, | |
| "rewards/margins": 0.22418944537639618, | |
| "rewards/rejected": -0.842465877532959, | |
| "rewards/safe_rewards": -0.6555901765823364, | |
| "rewards/unsafe_rewards": -0.5656682848930359, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.115634284696698e-08, | |
| "logits/chosen": 0.49705711007118225, | |
| "logits/rejected": 0.9479654431343079, | |
| "logps/chosen": -261.2461853027344, | |
| "logps/rejected": -270.83331298828125, | |
| "loss": 5189.8301, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.6632257699966431, | |
| "rewards/margins": 0.21208517253398895, | |
| "rewards/rejected": -0.8753108978271484, | |
| "rewards/safe_rewards": -0.6663291454315186, | |
| "rewards/unsafe_rewards": -0.6038998365402222, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.864534770821559e-08, | |
| "logits/chosen": 0.6149829626083374, | |
| "logits/rejected": 1.1939442157745361, | |
| "logps/chosen": -262.00933837890625, | |
| "logps/rejected": -240.24581909179688, | |
| "loss": 5618.5883, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6275893449783325, | |
| "rewards/margins": 0.20411472022533417, | |
| "rewards/rejected": -0.8317041397094727, | |
| "rewards/safe_rewards": -0.6472023725509644, | |
| "rewards/unsafe_rewards": -0.5557063817977905, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.620695643093924e-08, | |
| "logits/chosen": 0.43840399384498596, | |
| "logits/rejected": 1.105423092842102, | |
| "logps/chosen": -269.2837829589844, | |
| "logps/rejected": -238.085205078125, | |
| "loss": 5468.3313, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6394304037094116, | |
| "rewards/margins": 0.22106070816516876, | |
| "rewards/rejected": -0.860491156578064, | |
| "rewards/safe_rewards": -0.6031507849693298, | |
| "rewards/unsafe_rewards": -0.6791771650314331, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.384200664336412e-08, | |
| "logits/chosen": 0.5348480343818665, | |
| "logits/rejected": 1.0058144330978394, | |
| "logps/chosen": -268.3987731933594, | |
| "logps/rejected": -247.79696655273438, | |
| "loss": 5660.3645, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.5938838720321655, | |
| "rewards/margins": 0.21732494235038757, | |
| "rewards/rejected": -0.8112088441848755, | |
| "rewards/safe_rewards": -0.5639302134513855, | |
| "rewards/unsafe_rewards": -0.6350196599960327, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.155131074533529e-08, | |
| "logits/chosen": 0.30334433913230896, | |
| "logits/rejected": 0.9854658246040344, | |
| "logps/chosen": -283.627685546875, | |
| "logps/rejected": -263.83251953125, | |
| "loss": 6043.9172, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6394412517547607, | |
| "rewards/margins": 0.1600230187177658, | |
| "rewards/rejected": -0.7994643449783325, | |
| "rewards/safe_rewards": -0.6199285387992859, | |
| "rewards/unsafe_rewards": -0.6412296295166016, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.9335655629243645e-08, | |
| "logits/chosen": 0.39362573623657227, | |
| "logits/rejected": 0.9285033941268921, | |
| "logps/chosen": -270.2079162597656, | |
| "logps/rejected": -261.9796447753906, | |
| "loss": 5957.5516, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6112038493156433, | |
| "rewards/margins": 0.18837173283100128, | |
| "rewards/rejected": -0.7995756268501282, | |
| "rewards/safe_rewards": -0.6032061576843262, | |
| "rewards/unsafe_rewards": -0.6732661724090576, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.7195802409715197e-08, | |
| "logits/chosen": 0.2444291114807129, | |
| "logits/rejected": 0.9499914050102234, | |
| "logps/chosen": -298.4200134277344, | |
| "logps/rejected": -249.72866821289062, | |
| "loss": 5750.8313, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.6592567563056946, | |
| "rewards/margins": 0.1407555341720581, | |
| "rewards/rejected": -0.8000122904777527, | |
| "rewards/safe_rewards": -0.7100226283073425, | |
| "rewards/unsafe_rewards": -0.7015893459320068, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.513248616215527e-08, | |
| "logits/chosen": 0.3666357100009918, | |
| "logits/rejected": 0.9415947198867798, | |
| "logps/chosen": -277.87518310546875, | |
| "logps/rejected": -276.29119873046875, | |
| "loss": 5205.8715, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.6106274724006653, | |
| "rewards/margins": 0.24805088341236115, | |
| "rewards/rejected": -0.8586783409118652, | |
| "rewards/safe_rewards": -0.6150985956192017, | |
| "rewards/unsafe_rewards": -0.594727635383606, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.31464156702382e-08, | |
| "logits/chosen": 0.24014464020729065, | |
| "logits/rejected": 0.9577549695968628, | |
| "logps/chosen": -292.7112121582031, | |
| "logps/rejected": -265.7065734863281, | |
| "loss": 5896.8078, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.5955285429954529, | |
| "rewards/margins": 0.2333928644657135, | |
| "rewards/rejected": -0.8289214372634888, | |
| "rewards/safe_rewards": -0.6319350600242615, | |
| "rewards/unsafe_rewards": -0.5868616104125977, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.1238273182427933e-08, | |
| "logits/chosen": 0.6973511576652527, | |
| "logits/rejected": 1.2915074825286865, | |
| "logps/chosen": -265.3111572265625, | |
| "logps/rejected": -251.41201782226562, | |
| "loss": 5434.0336, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6617192029953003, | |
| "rewards/margins": 0.19598451256752014, | |
| "rewards/rejected": -0.857703685760498, | |
| "rewards/safe_rewards": -0.6422809362411499, | |
| "rewards/unsafe_rewards": -0.6228102445602417, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.9408714177614306e-08, | |
| "logits/chosen": 0.5173779726028442, | |
| "logits/rejected": 1.02643883228302, | |
| "logps/chosen": -268.9621887207031, | |
| "logps/rejected": -251.25808715820312, | |
| "loss": 5243.4758, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.6187028288841248, | |
| "rewards/margins": 0.22567462921142578, | |
| "rewards/rejected": -0.8443773984909058, | |
| "rewards/safe_rewards": -0.6375213265419006, | |
| "rewards/unsafe_rewards": -0.6421637535095215, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.7658367139945228e-08, | |
| "logits/chosen": 0.6539649963378906, | |
| "logits/rejected": 1.0953106880187988, | |
| "logps/chosen": -288.9885559082031, | |
| "logps/rejected": -259.146728515625, | |
| "loss": 5246.4344, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6686577200889587, | |
| "rewards/margins": 0.19176754355430603, | |
| "rewards/rejected": -0.8604252934455872, | |
| "rewards/safe_rewards": -0.7045280933380127, | |
| "rewards/unsafe_rewards": -0.7155130505561829, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.5987833342931745e-08, | |
| "logits/chosen": 0.4664410650730133, | |
| "logits/rejected": 1.215132236480713, | |
| "logps/chosen": -284.1900939941406, | |
| "logps/rejected": -251.48379516601562, | |
| "loss": 5564.9324, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.6805782318115234, | |
| "rewards/margins": 0.21095602214336395, | |
| "rewards/rejected": -0.8915343284606934, | |
| "rewards/safe_rewards": -0.67192143201828, | |
| "rewards/unsafe_rewards": -0.6578537821769714, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.439768664290053e-08, | |
| "logits/chosen": 0.48882967233657837, | |
| "logits/rejected": 1.0205453634262085, | |
| "logps/chosen": -288.0510559082031, | |
| "logps/rejected": -263.57122802734375, | |
| "loss": 5705.5039, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.6453284025192261, | |
| "rewards/margins": 0.18227383494377136, | |
| "rewards/rejected": -0.827602207660675, | |
| "rewards/safe_rewards": -0.6023403406143188, | |
| "rewards/unsafe_rewards": -0.6489912867546082, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.2888473281864597e-08, | |
| "logits/chosen": 0.3580858111381531, | |
| "logits/rejected": 0.9355760812759399, | |
| "logps/chosen": -252.00344848632812, | |
| "logps/rejected": -256.7703552246094, | |
| "loss": 5420.7055, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6472461819648743, | |
| "rewards/margins": 0.19622859358787537, | |
| "rewards/rejected": -0.8434747457504272, | |
| "rewards/safe_rewards": -0.6663787364959717, | |
| "rewards/unsafe_rewards": -0.6997274160385132, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.1460711699880082e-08, | |
| "logits/chosen": 0.32274478673934937, | |
| "logits/rejected": 0.9183855056762695, | |
| "logps/chosen": -281.06304931640625, | |
| "logps/rejected": -268.91278076171875, | |
| "loss": 5609.357, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.5867010951042175, | |
| "rewards/margins": 0.23433193564414978, | |
| "rewards/rejected": -0.8210331201553345, | |
| "rewards/safe_rewards": -0.5630391240119934, | |
| "rewards/unsafe_rewards": -0.6277604103088379, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.0114892356953397e-08, | |
| "logits/chosen": 0.381804883480072, | |
| "logits/rejected": 0.9557956457138062, | |
| "logps/chosen": -278.6263427734375, | |
| "logps/rejected": -252.7932891845703, | |
| "loss": 5676.834, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6421754360198975, | |
| "rewards/margins": 0.1775234043598175, | |
| "rewards/rejected": -0.8196988105773926, | |
| "rewards/safe_rewards": -0.6115553379058838, | |
| "rewards/unsafe_rewards": -0.6476501226425171, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.851477564560061e-09, | |
| "logits/chosen": 0.5100737810134888, | |
| "logits/rejected": 0.932380199432373, | |
| "logps/chosen": -263.25146484375, | |
| "logps/rejected": -271.11676025390625, | |
| "loss": 5593.4414, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.6300482749938965, | |
| "rewards/margins": 0.25807589292526245, | |
| "rewards/rejected": -0.8881241679191589, | |
| "rewards/safe_rewards": -0.6826761960983276, | |
| "rewards/unsafe_rewards": -0.6732330322265625, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.670901326832763e-09, | |
| "logits/chosen": 0.6556006669998169, | |
| "logits/rejected": 1.0529851913452148, | |
| "logps/chosen": -272.6200866699219, | |
| "logps/rejected": -291.10101318359375, | |
| "loss": 5333.684, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.7119321823120117, | |
| "rewards/margins": 0.18222954869270325, | |
| "rewards/rejected": -0.8941618204116821, | |
| "rewards/safe_rewards": -0.7450841069221497, | |
| "rewards/unsafe_rewards": -0.6783844232559204, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.5735691914738936e-09, | |
| "logits/chosen": 0.3428182005882263, | |
| "logits/rejected": 0.6993114948272705, | |
| "logps/chosen": -276.2501220703125, | |
| "logps/rejected": -270.787841796875, | |
| "loss": 6014.7414, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.6672028303146362, | |
| "rewards/margins": 0.16263318061828613, | |
| "rewards/rejected": -0.8298360109329224, | |
| "rewards/safe_rewards": -0.6557270288467407, | |
| "rewards/unsafe_rewards": -0.7067701816558838, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.559858110443016e-09, | |
| "logits/chosen": 0.3265165388584137, | |
| "logits/rejected": 0.9415761828422546, | |
| "logps/chosen": -279.380615234375, | |
| "logps/rejected": -258.53887939453125, | |
| "loss": 5329.075, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6516368985176086, | |
| "rewards/margins": 0.22732026875019073, | |
| "rewards/rejected": -0.8789570927619934, | |
| "rewards/safe_rewards": -0.6853364706039429, | |
| "rewards/unsafe_rewards": -0.6284711360931396, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.6301163104676685e-09, | |
| "logits/chosen": 0.5433076620101929, | |
| "logits/rejected": 0.899452805519104, | |
| "logps/chosen": -262.05511474609375, | |
| "logps/rejected": -280.93658447265625, | |
| "loss": 5452.5277, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6632400751113892, | |
| "rewards/margins": 0.19723954796791077, | |
| "rewards/rejected": -0.8604797124862671, | |
| "rewards/safe_rewards": -0.5747020244598389, | |
| "rewards/unsafe_rewards": -0.6066412329673767, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.784663173421438e-09, | |
| "logits/chosen": 0.47608470916748047, | |
| "logits/rejected": 0.8737590909004211, | |
| "logps/chosen": -294.0523376464844, | |
| "logps/rejected": -280.8829650878906, | |
| "loss": 5532.6391, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6354952454566956, | |
| "rewards/margins": 0.18091240525245667, | |
| "rewards/rejected": -0.8164075613021851, | |
| "rewards/safe_rewards": -0.6999973654747009, | |
| "rewards/unsafe_rewards": -0.6226142644882202, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.023789126611137e-09, | |
| "logits/chosen": 0.6358956694602966, | |
| "logits/rejected": 1.2913506031036377, | |
| "logps/chosen": -276.2715148925781, | |
| "logps/rejected": -243.6599884033203, | |
| "loss": 5192.1734, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.6617811918258667, | |
| "rewards/margins": 0.21255967020988464, | |
| "rewards/rejected": -0.874340832233429, | |
| "rewards/safe_rewards": -0.665223240852356, | |
| "rewards/unsafe_rewards": -0.67181396484375, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.3477555430100604e-09, | |
| "logits/chosen": 0.5863360166549683, | |
| "logits/rejected": 1.0950720310211182, | |
| "logps/chosen": -270.6855773925781, | |
| "logps/rejected": -254.65771484375, | |
| "loss": 5546.9984, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.5831121206283569, | |
| "rewards/margins": 0.2669592499732971, | |
| "rewards/rejected": -0.8500713109970093, | |
| "rewards/safe_rewards": -0.586032509803772, | |
| "rewards/unsafe_rewards": -0.577675461769104, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.7567946514721322e-09, | |
| "logits/chosen": 0.6444328427314758, | |
| "logits/rejected": 1.0208208560943604, | |
| "logps/chosen": -269.35577392578125, | |
| "logps/rejected": -271.528564453125, | |
| "loss": 5601.7539, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6750708818435669, | |
| "rewards/margins": 0.19110876321792603, | |
| "rewards/rejected": -0.8661795854568481, | |
| "rewards/safe_rewards": -0.6811034679412842, | |
| "rewards/unsafe_rewards": -0.7294248342514038, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.2511094569571668e-09, | |
| "logits/chosen": 0.3397526741027832, | |
| "logits/rejected": 1.0616391897201538, | |
| "logps/chosen": -257.86822509765625, | |
| "logps/rejected": -244.8105926513672, | |
| "loss": 5620.3375, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.631868302822113, | |
| "rewards/margins": 0.2000071257352829, | |
| "rewards/rejected": -0.8318754434585571, | |
| "rewards/safe_rewards": -0.5972138047218323, | |
| "rewards/unsafe_rewards": -0.6459835171699524, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.308736707954289e-10, | |
| "logits/chosen": 0.518609881401062, | |
| "logits/rejected": 1.1488319635391235, | |
| "logps/chosen": -273.81390380859375, | |
| "logps/rejected": -240.91372680664062, | |
| "loss": 5548.0289, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6856581568717957, | |
| "rewards/margins": 0.2014351636171341, | |
| "rewards/rejected": -0.8870933651924133, | |
| "rewards/safe_rewards": -0.6684737205505371, | |
| "rewards/unsafe_rewards": -0.694146990776062, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.962316510149222e-10, | |
| "logits/chosen": 0.3395392894744873, | |
| "logits/rejected": 1.0089718103408813, | |
| "logps/chosen": -252.1464080810547, | |
| "logps/rejected": -241.22982788085938, | |
| "loss": 5356.7621, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6337156295776367, | |
| "rewards/margins": 0.2152295857667923, | |
| "rewards/rejected": -0.8489452600479126, | |
| "rewards/safe_rewards": -0.6431758403778076, | |
| "rewards/unsafe_rewards": -0.6494039297103882, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.4729835275189016e-10, | |
| "logits/chosen": 0.5798267722129822, | |
| "logits/rejected": 0.9745955467224121, | |
| "logps/chosen": -243.1245574951172, | |
| "logps/rejected": -238.126220703125, | |
| "loss": 5836.127, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.6284788846969604, | |
| "rewards/margins": 0.2039627581834793, | |
| "rewards/rejected": -0.8324416279792786, | |
| "rewards/safe_rewards": -0.5914771556854248, | |
| "rewards/unsafe_rewards": -0.6241937279701233, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.415928876176482e-11, | |
| "logits/chosen": 0.4843016564846039, | |
| "logits/rejected": 0.8851835131645203, | |
| "logps/chosen": -258.23773193359375, | |
| "logps/rejected": -251.73001098632812, | |
| "loss": 6036.282, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6951759457588196, | |
| "rewards/margins": 0.1390235722064972, | |
| "rewards/rejected": -0.8341996073722839, | |
| "rewards/safe_rewards": -0.7087674140930176, | |
| "rewards/unsafe_rewards": -0.712031900882721, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 6.870500044303673e-12, | |
| "logits/chosen": 0.5293042063713074, | |
| "logits/rejected": 0.8430191874504089, | |
| "logps/chosen": -253.91397094726562, | |
| "logps/rejected": -270.7514953613281, | |
| "loss": 5497.6977, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.5842832326889038, | |
| "rewards/margins": 0.209587961435318, | |
| "rewards/rejected": -0.7938712239265442, | |
| "rewards/safe_rewards": -0.6020101308822632, | |
| "rewards/unsafe_rewards": -0.6186091303825378, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1884, | |
| "total_flos": 0.0, | |
| "train_loss": 5859.617769083399, | |
| "train_runtime": 32772.3871, | |
| "train_samples_per_second": 3.68, | |
| "train_steps_per_second": 0.057 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1884, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |