| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995965030262273, | |
| "eval_steps": 500, | |
| "global_step": 1858, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.6881720430107528e-09, | |
| "logits/chosen": -2.4663572311401367, | |
| "logits/rejected": -2.057170867919922, | |
| "logps/chosen": -246.4422607421875, | |
| "logps/rejected": -173.7652587890625, | |
| "loss": 0.5938, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "rewards/safe_rewards": 0.0, | |
| "rewards/unsafe_rewards": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.6881720430107527e-08, | |
| "logits/chosen": -2.3338096141815186, | |
| "logits/rejected": -2.1100988388061523, | |
| "logps/chosen": -199.19329833984375, | |
| "logps/rejected": -169.358642578125, | |
| "loss": 1.134, | |
| "rewards/accuracies": 0.4097222089767456, | |
| "rewards/chosen": -0.03551425039768219, | |
| "rewards/margins": -0.041799187660217285, | |
| "rewards/rejected": 0.006284935399889946, | |
| "rewards/safe_rewards": -0.01677405834197998, | |
| "rewards/unsafe_rewards": -0.0542544424533844, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.3763440860215054e-08, | |
| "logits/chosen": -2.329479694366455, | |
| "logits/rejected": -2.0858876705169678, | |
| "logps/chosen": -215.32296752929688, | |
| "logps/rejected": -176.8864288330078, | |
| "loss": 1.1266, | |
| "rewards/accuracies": 0.47187501192092896, | |
| "rewards/chosen": -0.031086910516023636, | |
| "rewards/margins": -0.04154179245233536, | |
| "rewards/rejected": 0.010454884730279446, | |
| "rewards/safe_rewards": -0.04110833257436752, | |
| "rewards/unsafe_rewards": -0.021065494045615196, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8.064516129032257e-08, | |
| "logits/chosen": -2.322885036468506, | |
| "logits/rejected": -2.1038832664489746, | |
| "logps/chosen": -199.3030242919922, | |
| "logps/rejected": -180.7991943359375, | |
| "loss": 1.1716, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.0008645713096484542, | |
| "rewards/margins": 0.027558892965316772, | |
| "rewards/rejected": -0.026694318279623985, | |
| "rewards/safe_rewards": -0.0032820613123476505, | |
| "rewards/unsafe_rewards": 0.005011203698813915, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.0752688172043011e-07, | |
| "logits/chosen": -2.268714427947998, | |
| "logits/rejected": -1.9988443851470947, | |
| "logps/chosen": -197.72109985351562, | |
| "logps/rejected": -177.70603942871094, | |
| "loss": 1.1036, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.023218240588903427, | |
| "rewards/margins": 0.022794129326939583, | |
| "rewards/rejected": 0.0004241138813085854, | |
| "rewards/safe_rewards": 0.03502867370843887, | |
| "rewards/unsafe_rewards": 0.011407810263335705, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.3440860215053762e-07, | |
| "logits/chosen": -2.374366283416748, | |
| "logits/rejected": -2.07818603515625, | |
| "logps/chosen": -191.63714599609375, | |
| "logps/rejected": -162.17771911621094, | |
| "loss": 1.1473, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.015013009309768677, | |
| "rewards/margins": 0.0906001627445221, | |
| "rewards/rejected": -0.10561318695545197, | |
| "rewards/safe_rewards": -0.018471335992217064, | |
| "rewards/unsafe_rewards": -0.011554678902029991, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.6129032258064515e-07, | |
| "logits/chosen": -2.346019983291626, | |
| "logits/rejected": -2.1285576820373535, | |
| "logps/chosen": -186.499755859375, | |
| "logps/rejected": -175.0586700439453, | |
| "loss": 1.0107, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.009731076657772064, | |
| "rewards/margins": 0.04699288681149483, | |
| "rewards/rejected": -0.037261806428432465, | |
| "rewards/safe_rewards": -0.01583387330174446, | |
| "rewards/unsafe_rewards": 0.03529602661728859, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.8817204301075268e-07, | |
| "logits/chosen": -2.3234503269195557, | |
| "logits/rejected": -2.110891819000244, | |
| "logps/chosen": -221.27426147460938, | |
| "logps/rejected": -179.11380004882812, | |
| "loss": 2.1985, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.11594200134277344, | |
| "rewards/margins": 0.07270021736621857, | |
| "rewards/rejected": 0.04324179142713547, | |
| "rewards/safe_rewards": 0.0875079482793808, | |
| "rewards/unsafe_rewards": 0.14437603950500488, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.1505376344086022e-07, | |
| "logits/chosen": -2.3453927040100098, | |
| "logits/rejected": -2.1327505111694336, | |
| "logps/chosen": -197.19949340820312, | |
| "logps/rejected": -176.77151489257812, | |
| "loss": 2.7155, | |
| "rewards/accuracies": 0.4468750059604645, | |
| "rewards/chosen": 0.15263572335243225, | |
| "rewards/margins": 0.012048400938510895, | |
| "rewards/rejected": 0.14058732986450195, | |
| "rewards/safe_rewards": 0.18941155076026917, | |
| "rewards/unsafe_rewards": 0.11585988849401474, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4193548387096775e-07, | |
| "logits/chosen": -2.3641719818115234, | |
| "logits/rejected": -2.137413263320923, | |
| "logps/chosen": -216.1211395263672, | |
| "logps/rejected": -168.5092315673828, | |
| "loss": 2.721, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.008618640713393688, | |
| "rewards/margins": 0.006397470831871033, | |
| "rewards/rejected": 0.002221171511337161, | |
| "rewards/safe_rewards": 0.031759221106767654, | |
| "rewards/unsafe_rewards": -0.014521944336593151, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.6881720430107523e-07, | |
| "logits/chosen": -2.360917568206787, | |
| "logits/rejected": -2.153608798980713, | |
| "logps/chosen": -201.7233428955078, | |
| "logps/rejected": -190.54605102539062, | |
| "loss": 1.4712, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.07169636338949203, | |
| "rewards/margins": 0.08422265201807022, | |
| "rewards/rejected": -0.012526283040642738, | |
| "rewards/safe_rewards": 0.09221886098384857, | |
| "rewards/unsafe_rewards": 0.0511738546192646, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.956989247311828e-07, | |
| "logits/chosen": -2.3796088695526123, | |
| "logits/rejected": -2.148357629776001, | |
| "logps/chosen": -207.0086212158203, | |
| "logps/rejected": -176.24658203125, | |
| "loss": 4.9646, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.0011837140191346407, | |
| "rewards/margins": 0.02744489349424839, | |
| "rewards/rejected": -0.02626117691397667, | |
| "rewards/safe_rewards": -0.013010969385504723, | |
| "rewards/unsafe_rewards": 0.015378397889435291, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.225806451612903e-07, | |
| "logits/chosen": -2.378938913345337, | |
| "logits/rejected": -2.1289939880371094, | |
| "logps/chosen": -203.86172485351562, | |
| "logps/rejected": -168.72509765625, | |
| "loss": 5.8793, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": 0.11708948761224747, | |
| "rewards/margins": -0.0013303399318829179, | |
| "rewards/rejected": 0.1184198409318924, | |
| "rewards/safe_rewards": 0.14375139772891998, | |
| "rewards/unsafe_rewards": 0.09042758494615555, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.4946236559139783e-07, | |
| "logits/chosen": -2.4672460556030273, | |
| "logits/rejected": -2.235044479370117, | |
| "logps/chosen": -211.15414428710938, | |
| "logps/rejected": -167.7396697998047, | |
| "loss": 2.9066, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -0.141743004322052, | |
| "rewards/margins": 0.018900588154792786, | |
| "rewards/rejected": -0.1606435775756836, | |
| "rewards/safe_rewards": -0.20868048071861267, | |
| "rewards/unsafe_rewards": -0.07480548322200775, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.7634408602150537e-07, | |
| "logits/chosen": -2.469130516052246, | |
| "logits/rejected": -2.2549142837524414, | |
| "logps/chosen": -219.2992401123047, | |
| "logps/rejected": -180.1728057861328, | |
| "loss": 14.0865, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.04582630842924118, | |
| "rewards/margins": 0.07086005806922913, | |
| "rewards/rejected": -0.025033747777342796, | |
| "rewards/safe_rewards": 0.05242709070444107, | |
| "rewards/unsafe_rewards": 0.03922552615404129, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.0322580645161285e-07, | |
| "logits/chosen": -2.4029898643493652, | |
| "logits/rejected": -2.2180120944976807, | |
| "logps/chosen": -205.2784881591797, | |
| "logps/rejected": -167.4949951171875, | |
| "loss": 1531.726, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": -0.9918906092643738, | |
| "rewards/margins": 0.18755348026752472, | |
| "rewards/rejected": -1.1794440746307373, | |
| "rewards/safe_rewards": -0.9263350367546082, | |
| "rewards/unsafe_rewards": -1.0574461221694946, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.3010752688172043e-07, | |
| "logits/chosen": -2.3345110416412354, | |
| "logits/rejected": -2.1149539947509766, | |
| "logps/chosen": -209.245849609375, | |
| "logps/rejected": -186.2938995361328, | |
| "loss": 76.4742, | |
| "rewards/accuracies": 0.4468750059604645, | |
| "rewards/chosen": -0.40810996294021606, | |
| "rewards/margins": -0.12766215205192566, | |
| "rewards/rejected": -0.280447781085968, | |
| "rewards/safe_rewards": -0.5669787526130676, | |
| "rewards/unsafe_rewards": -0.2492411583662033, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.569892473118279e-07, | |
| "logits/chosen": -2.364396810531616, | |
| "logits/rejected": -2.153006076812744, | |
| "logps/chosen": -193.48985290527344, | |
| "logps/rejected": -157.84793090820312, | |
| "loss": 366.3049, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.4611927568912506, | |
| "rewards/margins": -0.09943069517612457, | |
| "rewards/rejected": -0.36176207661628723, | |
| "rewards/safe_rewards": -0.3680972456932068, | |
| "rewards/unsafe_rewards": -0.5542882680892944, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.838709677419355e-07, | |
| "logits/chosen": -2.379281520843506, | |
| "logits/rejected": -2.1527695655822754, | |
| "logps/chosen": -201.44384765625, | |
| "logps/rejected": -176.66293334960938, | |
| "loss": 212.1672, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -1.0156188011169434, | |
| "rewards/margins": 0.32616162300109863, | |
| "rewards/rejected": -1.341780424118042, | |
| "rewards/safe_rewards": -0.7996016144752502, | |
| "rewards/unsafe_rewards": -1.2316361665725708, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.999929391798331e-07, | |
| "logits/chosen": -2.4363088607788086, | |
| "logits/rejected": -2.1664328575134277, | |
| "logps/chosen": -214.74972534179688, | |
| "logps/rejected": -172.83267211914062, | |
| "loss": 281.1637, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": -2.0342252254486084, | |
| "rewards/margins": -1.3259267807006836, | |
| "rewards/rejected": -0.7082984447479248, | |
| "rewards/safe_rewards": -1.9764223098754883, | |
| "rewards/unsafe_rewards": -2.0920281410217285, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9991350953333e-07, | |
| "logits/chosen": -2.399965763092041, | |
| "logits/rejected": -2.1533687114715576, | |
| "logps/chosen": -211.14138793945312, | |
| "logps/rejected": -183.2847442626953, | |
| "loss": 37.4693, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.5885945558547974, | |
| "rewards/margins": -0.33234477043151855, | |
| "rewards/rejected": -0.2562498152256012, | |
| "rewards/safe_rewards": 0.10301212966442108, | |
| "rewards/unsafe_rewards": -1.2802014350891113, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.997458523498236e-07, | |
| "logits/chosen": -2.4136548042297363, | |
| "logits/rejected": -2.1710681915283203, | |
| "logps/chosen": -192.46209716796875, | |
| "logps/rejected": -160.3273468017578, | |
| "loss": 19.4933, | |
| "rewards/accuracies": 0.46562498807907104, | |
| "rewards/chosen": 0.7500754594802856, | |
| "rewards/margins": 0.07740475982427597, | |
| "rewards/rejected": 0.6726706624031067, | |
| "rewards/safe_rewards": 0.8147931098937988, | |
| "rewards/unsafe_rewards": 0.6853577494621277, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.99490026817712e-07, | |
| "logits/chosen": -2.3793249130249023, | |
| "logits/rejected": -2.126897096633911, | |
| "logps/chosen": -206.8174591064453, | |
| "logps/rejected": -174.28512573242188, | |
| "loss": 618.2743, | |
| "rewards/accuracies": 0.4593749940395355, | |
| "rewards/chosen": 0.6119144558906555, | |
| "rewards/margins": 1.0083643198013306, | |
| "rewards/rejected": -0.3964497447013855, | |
| "rewards/safe_rewards": 0.269029825925827, | |
| "rewards/unsafe_rewards": 0.9547992944717407, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.991461232516674e-07, | |
| "logits/chosen": -2.278285503387451, | |
| "logits/rejected": -2.0165598392486572, | |
| "logps/chosen": -220.05496215820312, | |
| "logps/rejected": -191.4230499267578, | |
| "loss": 117.4644, | |
| "rewards/accuracies": 0.44062501192092896, | |
| "rewards/chosen": -2.331136465072632, | |
| "rewards/margins": -0.27771270275115967, | |
| "rewards/rejected": -2.053424119949341, | |
| "rewards/safe_rewards": -1.6258525848388672, | |
| "rewards/unsafe_rewards": -3.0364208221435547, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.98714263060751e-07, | |
| "logits/chosen": -2.2665092945098877, | |
| "logits/rejected": -1.9782488346099854, | |
| "logps/chosen": -189.6136016845703, | |
| "logps/rejected": -156.85269165039062, | |
| "loss": 123.5274, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -1.6799499988555908, | |
| "rewards/margins": -0.4719271659851074, | |
| "rewards/rejected": -1.2080228328704834, | |
| "rewards/safe_rewards": -1.867531418800354, | |
| "rewards/unsafe_rewards": -1.4923683404922485, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.98194598705552e-07, | |
| "logits/chosen": -2.2388875484466553, | |
| "logits/rejected": -2.0419199466705322, | |
| "logps/chosen": -203.91488647460938, | |
| "logps/rejected": -175.87570190429688, | |
| "loss": 29.462, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -0.5732041597366333, | |
| "rewards/margins": 0.3381038308143616, | |
| "rewards/rejected": -0.9113079905509949, | |
| "rewards/safe_rewards": -0.5594094395637512, | |
| "rewards/unsafe_rewards": -0.5869989395141602, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.975873136443648e-07, | |
| "logits/chosen": -2.323503017425537, | |
| "logits/rejected": -2.1084866523742676, | |
| "logps/chosen": -219.4092254638672, | |
| "logps/rejected": -188.0467071533203, | |
| "loss": 514.7106, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": 0.01872560940682888, | |
| "rewards/margins": 0.048060666769742966, | |
| "rewards/rejected": -0.029335061088204384, | |
| "rewards/safe_rewards": -0.101626917719841, | |
| "rewards/unsafe_rewards": 0.13907812535762787, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.968926222684212e-07, | |
| "logits/chosen": -2.3192670345306396, | |
| "logits/rejected": -2.128873586654663, | |
| "logps/chosen": -195.8466796875, | |
| "logps/rejected": -173.4759063720703, | |
| "loss": 62.0019, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": 0.5205889940261841, | |
| "rewards/margins": 0.10105878114700317, | |
| "rewards/rejected": 0.4195302128791809, | |
| "rewards/safe_rewards": 0.4973847270011902, | |
| "rewards/unsafe_rewards": 0.5437930822372437, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.961107698262044e-07, | |
| "logits/chosen": -2.3513216972351074, | |
| "logits/rejected": -2.1132161617279053, | |
| "logps/chosen": -209.58480834960938, | |
| "logps/rejected": -173.8505096435547, | |
| "loss": 19.9099, | |
| "rewards/accuracies": 0.47187501192092896, | |
| "rewards/chosen": 1.4319963455200195, | |
| "rewards/margins": -0.04141209274530411, | |
| "rewards/rejected": 1.4734083414077759, | |
| "rewards/safe_rewards": 0.7625109553337097, | |
| "rewards/unsafe_rewards": 2.1014816761016846, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.952420323368673e-07, | |
| "logits/chosen": -2.327949047088623, | |
| "logits/rejected": -2.081421136856079, | |
| "logps/chosen": -202.83131408691406, | |
| "logps/rejected": -173.12339782714844, | |
| "loss": 166.1931, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 1.1239261627197266, | |
| "rewards/margins": 0.29456058144569397, | |
| "rewards/rejected": 0.8293657302856445, | |
| "rewards/safe_rewards": 0.95171719789505, | |
| "rewards/unsafe_rewards": 1.2961351871490479, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.942867164927899e-07, | |
| "logits/chosen": -2.3304100036621094, | |
| "logits/rejected": -2.148871898651123, | |
| "logps/chosen": -200.2861785888672, | |
| "logps/rejected": -173.5687713623047, | |
| "loss": 83.8678, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 1.1026077270507812, | |
| "rewards/margins": 0.16524335741996765, | |
| "rewards/rejected": 0.9373642206192017, | |
| "rewards/safe_rewards": 1.20353102684021, | |
| "rewards/unsafe_rewards": 1.001684308052063, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.932451595513062e-07, | |
| "logits/chosen": -2.3603804111480713, | |
| "logits/rejected": -2.1054179668426514, | |
| "logps/chosen": -222.5138702392578, | |
| "logps/rejected": -189.41696166992188, | |
| "loss": 125.375, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 1.2356212139129639, | |
| "rewards/margins": 0.4028751254081726, | |
| "rewards/rejected": 0.8327462077140808, | |
| "rewards/safe_rewards": 1.2047992944717407, | |
| "rewards/unsafe_rewards": 1.2664434909820557, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.921177292156419e-07, | |
| "logits/chosen": -2.4207069873809814, | |
| "logits/rejected": -2.131692409515381, | |
| "logps/chosen": -197.57579040527344, | |
| "logps/rejected": -173.03189086914062, | |
| "loss": 32.4693, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 1.0067864656448364, | |
| "rewards/margins": -0.07762779295444489, | |
| "rewards/rejected": 1.0844142436981201, | |
| "rewards/safe_rewards": 0.9899358749389648, | |
| "rewards/unsafe_rewards": 1.0236369371414185, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.909048235051033e-07, | |
| "logits/chosen": -2.3886237144470215, | |
| "logits/rejected": -2.2095794677734375, | |
| "logps/chosen": -201.99131774902344, | |
| "logps/rejected": -180.18301391601562, | |
| "loss": 165.1989, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 0.8952449560165405, | |
| "rewards/margins": 0.1524442732334137, | |
| "rewards/rejected": 0.7428006529808044, | |
| "rewards/safe_rewards": 0.9638195037841797, | |
| "rewards/unsafe_rewards": 0.8266702890396118, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.896068706145631e-07, | |
| "logits/chosen": -2.4264276027679443, | |
| "logits/rejected": -2.1699893474578857, | |
| "logps/chosen": -209.13687133789062, | |
| "logps/rejected": -161.4777374267578, | |
| "loss": 63.6332, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.5357077717781067, | |
| "rewards/margins": 0.20826852321624756, | |
| "rewards/rejected": 0.32743921875953674, | |
| "rewards/safe_rewards": 0.6318890452384949, | |
| "rewards/unsafe_rewards": 0.4395265579223633, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.882243287632946e-07, | |
| "logits/chosen": -2.4155266284942627, | |
| "logits/rejected": -2.1885287761688232, | |
| "logps/chosen": -190.31680297851562, | |
| "logps/rejected": -167.34011840820312, | |
| "loss": 22.5493, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.2450559437274933, | |
| "rewards/margins": 0.11199624836444855, | |
| "rewards/rejected": 0.13305969536304474, | |
| "rewards/safe_rewards": 0.32091599702835083, | |
| "rewards/unsafe_rewards": 0.16919586062431335, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.867576860332048e-07, | |
| "logits/chosen": -2.4087131023406982, | |
| "logits/rejected": -2.1696860790252686, | |
| "logps/chosen": -182.63320922851562, | |
| "logps/rejected": -157.3323974609375, | |
| "loss": 39.9616, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.33797144889831543, | |
| "rewards/margins": 0.2170281708240509, | |
| "rewards/rejected": 0.12094320356845856, | |
| "rewards/safe_rewards": 0.7084277868270874, | |
| "rewards/unsafe_rewards": -0.0324850007891655, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.85207460196526e-07, | |
| "logits/chosen": -2.3588593006134033, | |
| "logits/rejected": -2.1359121799468994, | |
| "logps/chosen": -201.29721069335938, | |
| "logps/rejected": -180.4462432861328, | |
| "loss": 18.4967, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.20872633159160614, | |
| "rewards/margins": -0.07106774300336838, | |
| "rewards/rejected": -0.13765858113765717, | |
| "rewards/safe_rewards": -0.24322757124900818, | |
| "rewards/unsafe_rewards": -0.1742250919342041, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.835741985330259e-07, | |
| "logits/chosen": -2.393688678741455, | |
| "logits/rejected": -2.1949095726013184, | |
| "logps/chosen": -196.72280883789062, | |
| "logps/rejected": -164.93276977539062, | |
| "loss": 13.0753, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": -0.13964949548244476, | |
| "rewards/margins": 0.004895883612334728, | |
| "rewards/rejected": -0.14454536139965057, | |
| "rewards/safe_rewards": -0.14425238966941833, | |
| "rewards/unsafe_rewards": -0.1350466012954712, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.818584776367992e-07, | |
| "logits/chosen": -2.348188638687134, | |
| "logits/rejected": -2.183293342590332, | |
| "logps/chosen": -207.3245086669922, | |
| "logps/rejected": -185.33078002929688, | |
| "loss": 405.7585, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": 0.9341610074043274, | |
| "rewards/margins": -0.25985628366470337, | |
| "rewards/rejected": 1.1940172910690308, | |
| "rewards/safe_rewards": 1.5165033340454102, | |
| "rewards/unsafe_rewards": 0.3518185615539551, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.800609032127122e-07, | |
| "logits/chosen": -2.362936496734619, | |
| "logits/rejected": -2.117405652999878, | |
| "logps/chosen": -205.0863037109375, | |
| "logps/rejected": -173.82562255859375, | |
| "loss": 250.8796, | |
| "rewards/accuracies": 0.46562498807907104, | |
| "rewards/chosen": 0.8158755302429199, | |
| "rewards/margins": 0.04819601774215698, | |
| "rewards/rejected": 0.7676795721054077, | |
| "rewards/safe_rewards": 0.8238789439201355, | |
| "rewards/unsafe_rewards": 0.8078721761703491, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.78182109862569e-07, | |
| "logits/chosen": -2.334447145462036, | |
| "logits/rejected": -2.1603846549987793, | |
| "logps/chosen": -193.15878295898438, | |
| "logps/rejected": -169.64031982421875, | |
| "loss": 43.271, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 1.1323336362838745, | |
| "rewards/margins": -0.2824760377407074, | |
| "rewards/rejected": 1.4148097038269043, | |
| "rewards/safe_rewards": 1.1305078268051147, | |
| "rewards/unsafe_rewards": 1.1341596841812134, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.7622276086107677e-07, | |
| "logits/chosen": -2.4567148685455322, | |
| "logits/rejected": -2.2268338203430176, | |
| "logps/chosen": -221.8797149658203, | |
| "logps/rejected": -183.58682250976562, | |
| "loss": 170.0915, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 1.5294605493545532, | |
| "rewards/margins": -0.26579660177230835, | |
| "rewards/rejected": 1.7952572107315063, | |
| "rewards/safe_rewards": 1.6476377248764038, | |
| "rewards/unsafe_rewards": 1.4112837314605713, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.741835479216879e-07, | |
| "logits/chosen": -2.4018983840942383, | |
| "logits/rejected": -2.1745998859405518, | |
| "logps/chosen": -224.1997833251953, | |
| "logps/rejected": -202.8693084716797, | |
| "loss": 318.6482, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 1.9682689905166626, | |
| "rewards/margins": 1.3845123052597046, | |
| "rewards/rejected": 0.5837565660476685, | |
| "rewards/safe_rewards": 1.9361345767974854, | |
| "rewards/unsafe_rewards": 2.0004029273986816, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.720651909524036e-07, | |
| "logits/chosen": -2.368582248687744, | |
| "logits/rejected": -2.1598029136657715, | |
| "logps/chosen": -199.04641723632812, | |
| "logps/rejected": -171.59878540039062, | |
| "loss": 20.6844, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.34873148798942566, | |
| "rewards/margins": -0.22367699444293976, | |
| "rewards/rejected": 0.5724084973335266, | |
| "rewards/safe_rewards": 0.4507713317871094, | |
| "rewards/unsafe_rewards": 0.24669162929058075, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.698684378016222e-07, | |
| "logits/chosen": -2.4238266944885254, | |
| "logits/rejected": -2.1877074241638184, | |
| "logps/chosen": -206.9587860107422, | |
| "logps/rejected": -166.5978546142578, | |
| "loss": 36.0619, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.48088520765304565, | |
| "rewards/margins": -0.556584894657135, | |
| "rewards/rejected": 0.07569964975118637, | |
| "rewards/safe_rewards": -0.8808043599128723, | |
| "rewards/unsafe_rewards": -0.08096615970134735, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.675940639941256e-07, | |
| "logits/chosen": -2.381782054901123, | |
| "logits/rejected": -2.2072319984436035, | |
| "logps/chosen": -202.72836303710938, | |
| "logps/rejected": -178.13565063476562, | |
| "loss": 19.0221, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.561229407787323, | |
| "rewards/margins": 0.22027714550495148, | |
| "rewards/rejected": 0.3409522473812103, | |
| "rewards/safe_rewards": 0.481137752532959, | |
| "rewards/unsafe_rewards": 0.641321063041687, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.6524287245729286e-07, | |
| "logits/chosen": -2.3484253883361816, | |
| "logits/rejected": -2.134340524673462, | |
| "logps/chosen": -198.06240844726562, | |
| "logps/rejected": -166.09368896484375, | |
| "loss": 26.6374, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.22380805015563965, | |
| "rewards/margins": -0.03325925022363663, | |
| "rewards/rejected": 0.2570672631263733, | |
| "rewards/safe_rewards": 0.1073969230055809, | |
| "rewards/unsafe_rewards": 0.3402191996574402, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.628156932376418e-07, | |
| "logits/chosen": -2.3849387168884277, | |
| "logits/rejected": -2.1502578258514404, | |
| "logps/chosen": -202.72006225585938, | |
| "logps/rejected": -165.7488555908203, | |
| "loss": 163.8104, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": 0.22546739876270294, | |
| "rewards/margins": 0.1902145892381668, | |
| "rewards/rejected": 0.03525285795331001, | |
| "rewards/safe_rewards": -0.18428334593772888, | |
| "rewards/unsafe_rewards": 0.6352182030677795, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.603133832077953e-07, | |
| "logits/chosen": -2.3536932468414307, | |
| "logits/rejected": -2.1680946350097656, | |
| "logps/chosen": -209.31454467773438, | |
| "logps/rejected": -199.3543701171875, | |
| "loss": 75.0922, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": 0.6691935658454895, | |
| "rewards/margins": 0.05490832403302193, | |
| "rewards/rejected": 0.6142852902412415, | |
| "rewards/safe_rewards": 0.21514494717121124, | |
| "rewards/unsafe_rewards": 1.1232421398162842, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5773682576397776e-07, | |
| "logits/chosen": -2.360821008682251, | |
| "logits/rejected": -2.1603407859802246, | |
| "logps/chosen": -201.4778594970703, | |
| "logps/rejected": -169.81484985351562, | |
| "loss": 131.6857, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": 0.10828091204166412, | |
| "rewards/margins": -0.5544548034667969, | |
| "rewards/rejected": 0.662735641002655, | |
| "rewards/safe_rewards": -0.3237845301628113, | |
| "rewards/unsafe_rewards": 0.5403462648391724, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_logits/chosen": -2.056485414505005, | |
| "eval_logits/rejected": -1.803229808807373, | |
| "eval_logps/chosen": -130.9681396484375, | |
| "eval_logps/rejected": -92.36480712890625, | |
| "eval_loss": 0.8894476294517517, | |
| "eval_rewards/accuracies": 0.45462244749069214, | |
| "eval_rewards/chosen": -0.10225697606801987, | |
| "eval_rewards/margins": -0.08933582156896591, | |
| "eval_rewards/rejected": -0.012921147979795933, | |
| "eval_rewards/safe_rewards": -0.10428992658853531, | |
| "eval_rewards/unsafe_rewards": -0.10168781876564026, | |
| "eval_runtime": 2237.5747, | |
| "eval_samples_per_second": 14.768, | |
| "eval_steps_per_second": 0.923, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5508693051414774e-07, | |
| "logits/chosen": -2.3876683712005615, | |
| "logits/rejected": -2.2101075649261475, | |
| "logps/chosen": -197.6197509765625, | |
| "logps/rejected": -179.2535858154297, | |
| "loss": 10.2996, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.15894845128059387, | |
| "rewards/margins": -0.012749219313263893, | |
| "rewards/rejected": 0.17169766128063202, | |
| "rewards/safe_rewards": 0.22355195879936218, | |
| "rewards/unsafe_rewards": 0.09434493631124496, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.52364632956877e-07, | |
| "logits/chosen": -2.3795700073242188, | |
| "logits/rejected": -2.167722225189209, | |
| "logps/chosen": -209.80477905273438, | |
| "logps/rejected": -170.1284942626953, | |
| "loss": 102.0474, | |
| "rewards/accuracies": 0.47187501192092896, | |
| "rewards/chosen": -0.054634951055049896, | |
| "rewards/margins": -0.5382941961288452, | |
| "rewards/rejected": 0.48365920782089233, | |
| "rewards/safe_rewards": 0.07545175403356552, | |
| "rewards/unsafe_rewards": -0.1847216635942459, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.4957089415108895e-07, | |
| "logits/chosen": -2.3528215885162354, | |
| "logits/rejected": -2.1418814659118652, | |
| "logps/chosen": -187.97207641601562, | |
| "logps/rejected": -165.0076446533203, | |
| "loss": 120.2137, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.6140010952949524, | |
| "rewards/margins": 0.31187087297439575, | |
| "rewards/rejected": 0.30213022232055664, | |
| "rewards/safe_rewards": 0.45020800828933716, | |
| "rewards/unsafe_rewards": 0.7777942419052124, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.467067003767745e-07, | |
| "logits/chosen": -2.441636800765991, | |
| "logits/rejected": -2.219637870788574, | |
| "logps/chosen": -215.01718139648438, | |
| "logps/rejected": -178.1621856689453, | |
| "loss": 31.3751, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": 0.5192718505859375, | |
| "rewards/margins": 0.13771791756153107, | |
| "rewards/rejected": 0.3815539479255676, | |
| "rewards/safe_rewards": 0.3229585587978363, | |
| "rewards/unsafe_rewards": 0.7155852317810059, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.437730627868027e-07, | |
| "logits/chosen": -2.378955602645874, | |
| "logits/rejected": -2.138523578643799, | |
| "logps/chosen": -181.02993774414062, | |
| "logps/rejected": -161.35678100585938, | |
| "loss": 48.7052, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.13924112915992737, | |
| "rewards/margins": 0.37750715017318726, | |
| "rewards/rejected": -0.23826603591442108, | |
| "rewards/safe_rewards": 0.4204103946685791, | |
| "rewards/unsafe_rewards": -0.14192816615104675, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.4077101704995163e-07, | |
| "logits/chosen": -2.4157960414886475, | |
| "logits/rejected": -2.1959304809570312, | |
| "logps/chosen": -204.2389373779297, | |
| "logps/rejected": -188.56707763671875, | |
| "loss": 23.5436, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.36682039499282837, | |
| "rewards/margins": -0.1311464011669159, | |
| "rewards/rejected": -0.23567399382591248, | |
| "rewards/safe_rewards": -0.4130277633666992, | |
| "rewards/unsafe_rewards": -0.3206130862236023, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.3770162298528356e-07, | |
| "logits/chosen": -2.4378573894500732, | |
| "logits/rejected": -2.243499994277954, | |
| "logps/chosen": -201.71572875976562, | |
| "logps/rejected": -169.5461883544922, | |
| "loss": 48.0924, | |
| "rewards/accuracies": 0.565625011920929, | |
| "rewards/chosen": 0.21430592238903046, | |
| "rewards/margins": 0.6119717359542847, | |
| "rewards/rejected": -0.397665798664093, | |
| "rewards/safe_rewards": -0.27201324701309204, | |
| "rewards/unsafe_rewards": 0.7006251811981201, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.3456596418799476e-07, | |
| "logits/chosen": -2.383977174758911, | |
| "logits/rejected": -2.204479694366455, | |
| "logps/chosen": -208.63818359375, | |
| "logps/rejected": -172.74533081054688, | |
| "loss": 40.887, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.31667083501815796, | |
| "rewards/margins": 0.20149526000022888, | |
| "rewards/rejected": 0.11517556756734848, | |
| "rewards/safe_rewards": 0.08531586080789566, | |
| "rewards/unsafe_rewards": 0.5480257868766785, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.313651476468715e-07, | |
| "logits/chosen": -2.452789783477783, | |
| "logits/rejected": -2.2367706298828125, | |
| "logps/chosen": -206.00991821289062, | |
| "logps/rejected": -181.455810546875, | |
| "loss": 17.7462, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.025053849443793297, | |
| "rewards/margins": -0.13409826159477234, | |
| "rewards/rejected": 0.1591521054506302, | |
| "rewards/safe_rewards": -0.05954737588763237, | |
| "rewards/unsafe_rewards": 0.10965506732463837, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.2810030335348693e-07, | |
| "logits/chosen": -2.4099035263061523, | |
| "logits/rejected": -2.218843936920166, | |
| "logps/chosen": -218.79177856445312, | |
| "logps/rejected": -168.67431640625, | |
| "loss": 74.9431, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.23495905101299286, | |
| "rewards/margins": -0.6415296196937561, | |
| "rewards/rejected": 0.40657052397727966, | |
| "rewards/safe_rewards": -0.5243301391601562, | |
| "rewards/unsafe_rewards": 0.05441205948591232, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.2477258390327806e-07, | |
| "logits/chosen": -2.4378225803375244, | |
| "logits/rejected": -2.2049014568328857, | |
| "logps/chosen": -190.23843383789062, | |
| "logps/rejected": -167.7356719970703, | |
| "loss": 24.3453, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.24785485863685608, | |
| "rewards/margins": -0.3185795247554779, | |
| "rewards/rejected": 0.07072468847036362, | |
| "rewards/safe_rewards": -0.1916726529598236, | |
| "rewards/unsafe_rewards": -0.30403703451156616, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.2138316408864197e-07, | |
| "logits/chosen": -2.4828572273254395, | |
| "logits/rejected": -2.251974105834961, | |
| "logps/chosen": -195.9208221435547, | |
| "logps/rejected": -162.9341278076172, | |
| "loss": 47.0644, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.2951027750968933, | |
| "rewards/margins": 0.2895166277885437, | |
| "rewards/rejected": 0.005586123559623957, | |
| "rewards/safe_rewards": 0.2697201073169708, | |
| "rewards/unsafe_rewards": 0.3204854130744934, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.179332404841962e-07, | |
| "logits/chosen": -2.4540035724639893, | |
| "logits/rejected": -2.223843812942505, | |
| "logps/chosen": -208.46463012695312, | |
| "logps/rejected": -176.60848999023438, | |
| "loss": 25.2961, | |
| "rewards/accuracies": 0.46562498807907104, | |
| "rewards/chosen": 0.14698375761508942, | |
| "rewards/margins": 0.1325828731060028, | |
| "rewards/rejected": 0.014400847256183624, | |
| "rewards/safe_rewards": 0.04335422068834305, | |
| "rewards/unsafe_rewards": 0.250613272190094, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.1442403102434954e-07, | |
| "logits/chosen": -2.4651191234588623, | |
| "logits/rejected": -2.252150535583496, | |
| "logps/chosen": -212.79736328125, | |
| "logps/rejected": -179.38711547851562, | |
| "loss": 117.4084, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.48046717047691345, | |
| "rewards/margins": -0.18567809462547302, | |
| "rewards/rejected": -0.29478907585144043, | |
| "rewards/safe_rewards": -0.6295033693313599, | |
| "rewards/unsafe_rewards": -0.3314310312271118, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.108567745733318e-07, | |
| "logits/chosen": -2.447937488555908, | |
| "logits/rejected": -2.201697826385498, | |
| "logps/chosen": -184.49168395996094, | |
| "logps/rejected": -166.9139404296875, | |
| "loss": 10.7524, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.005866925232112408, | |
| "rewards/margins": 0.17292609810829163, | |
| "rewards/rejected": -0.1787930279970169, | |
| "rewards/safe_rewards": 0.03122568130493164, | |
| "rewards/unsafe_rewards": -0.042959537357091904, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.0723273048783426e-07, | |
| "logits/chosen": -2.44038462638855, | |
| "logits/rejected": -2.2175660133361816, | |
| "logps/chosen": -211.3206787109375, | |
| "logps/rejected": -165.2122802734375, | |
| "loss": 81.9566, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.5619795918464661, | |
| "rewards/margins": 0.545897364616394, | |
| "rewards/rejected": 0.016082104295492172, | |
| "rewards/safe_rewards": 1.0618271827697754, | |
| "rewards/unsafe_rewards": 0.06213190406560898, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.0355317817241697e-07, | |
| "logits/chosen": -2.3970015048980713, | |
| "logits/rejected": -2.163048267364502, | |
| "logps/chosen": -229.952880859375, | |
| "logps/rejected": -176.55599975585938, | |
| "loss": 26.2558, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.38434115052223206, | |
| "rewards/margins": 0.2878434658050537, | |
| "rewards/rejected": 0.09649765491485596, | |
| "rewards/safe_rewards": 0.5011194944381714, | |
| "rewards/unsafe_rewards": 0.26756277680397034, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.998194166278367e-07, | |
| "logits/chosen": -2.4422953128814697, | |
| "logits/rejected": -2.2152860164642334, | |
| "logps/chosen": -193.12109375, | |
| "logps/rejected": -156.7648162841797, | |
| "loss": 157.1721, | |
| "rewards/accuracies": 0.4593749940395355, | |
| "rewards/chosen": -0.3413035273551941, | |
| "rewards/margins": -0.26979130506515503, | |
| "rewards/rejected": -0.07151220738887787, | |
| "rewards/safe_rewards": -0.49346867203712463, | |
| "rewards/unsafe_rewards": -0.18913838267326355, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.9603276399245855e-07, | |
| "logits/chosen": -2.4512076377868652, | |
| "logits/rejected": -2.217556953430176, | |
| "logps/chosen": -212.5731658935547, | |
| "logps/rejected": -172.98239135742188, | |
| "loss": 140.5213, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.045460961759090424, | |
| "rewards/margins": 0.4976281523704529, | |
| "rewards/rejected": -0.5430891513824463, | |
| "rewards/safe_rewards": 0.1586223542690277, | |
| "rewards/unsafe_rewards": -0.24954433739185333, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.9219455707691e-07, | |
| "logits/chosen": -2.443801164627075, | |
| "logits/rejected": -2.217026710510254, | |
| "logps/chosen": -223.50064086914062, | |
| "logps/rejected": -188.3572998046875, | |
| "loss": 239.7127, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -1.1688416004180908, | |
| "rewards/margins": -0.6104832291603088, | |
| "rewards/rejected": -0.5583583116531372, | |
| "rewards/safe_rewards": -0.5376420021057129, | |
| "rewards/unsafe_rewards": -1.8000411987304688, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.883061508921439e-07, | |
| "logits/chosen": -2.4577882289886475, | |
| "logits/rejected": -2.289802074432373, | |
| "logps/chosen": -199.79066467285156, | |
| "logps/rejected": -191.25059509277344, | |
| "loss": 127.1414, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.8074787855148315, | |
| "rewards/margins": -0.5068421363830566, | |
| "rewards/rejected": -0.30063679814338684, | |
| "rewards/safe_rewards": -1.0235812664031982, | |
| "rewards/unsafe_rewards": -0.5913764238357544, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.8436891817107555e-07, | |
| "logits/chosen": -2.384692668914795, | |
| "logits/rejected": -2.2363414764404297, | |
| "logps/chosen": -192.9431915283203, | |
| "logps/rejected": -173.0110626220703, | |
| "loss": 88.3357, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": -0.845304012298584, | |
| "rewards/margins": 0.22771115601062775, | |
| "rewards/rejected": -1.0730152130126953, | |
| "rewards/safe_rewards": -0.7142607569694519, | |
| "rewards/unsafe_rewards": -0.9763473272323608, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.8038424888396414e-07, | |
| "logits/chosen": -2.4334444999694824, | |
| "logits/rejected": -2.2202000617980957, | |
| "logps/chosen": -190.13265991210938, | |
| "logps/rejected": -173.72535705566406, | |
| "loss": 46.5741, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.9059289693832397, | |
| "rewards/margins": 0.07719539105892181, | |
| "rewards/rejected": -0.9831243753433228, | |
| "rewards/safe_rewards": -1.5265599489212036, | |
| "rewards/unsafe_rewards": -0.2852979004383087, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.763535497477079e-07, | |
| "logits/chosen": -2.428952693939209, | |
| "logits/rejected": -2.205458641052246, | |
| "logps/chosen": -203.35873413085938, | |
| "logps/rejected": -178.9982452392578, | |
| "loss": 30.0399, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.06326188147068024, | |
| "rewards/margins": 0.4762391149997711, | |
| "rewards/rejected": -0.4129772186279297, | |
| "rewards/safe_rewards": 0.01683131232857704, | |
| "rewards/unsafe_rewards": 0.10969245433807373, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.7227824372922795e-07, | |
| "logits/chosen": -2.4341301918029785, | |
| "logits/rejected": -2.2008628845214844, | |
| "logps/chosen": -189.18417358398438, | |
| "logps/rejected": -167.0784454345703, | |
| "loss": 12.3092, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.09529106318950653, | |
| "rewards/margins": 0.02994244359433651, | |
| "rewards/rejected": 0.06534863263368607, | |
| "rewards/safe_rewards": 0.103404700756073, | |
| "rewards/unsafe_rewards": 0.08717743307352066, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.681597695431148e-07, | |
| "logits/chosen": -2.397660732269287, | |
| "logits/rejected": -2.248548984527588, | |
| "logps/chosen": -201.36961364746094, | |
| "logps/rejected": -183.10923767089844, | |
| "loss": 44.1826, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.05803655833005905, | |
| "rewards/margins": 0.0876794308423996, | |
| "rewards/rejected": -0.14571599662303925, | |
| "rewards/safe_rewards": -0.19255781173706055, | |
| "rewards/unsafe_rewards": 0.07648466527462006, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.639995811437159e-07, | |
| "logits/chosen": -2.3755042552948, | |
| "logits/rejected": -2.191373348236084, | |
| "logps/chosen": -197.1927032470703, | |
| "logps/rejected": -179.4755859375, | |
| "loss": 154.7574, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.14616283774375916, | |
| "rewards/margins": 0.41009521484375, | |
| "rewards/rejected": -0.26393240690231323, | |
| "rewards/safe_rewards": 0.3775586485862732, | |
| "rewards/unsafe_rewards": -0.08523297309875488, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.597991472118426e-07, | |
| "logits/chosen": -2.4273521900177, | |
| "logits/rejected": -2.192534923553467, | |
| "logps/chosen": -206.8874053955078, | |
| "logps/rejected": -176.24118041992188, | |
| "loss": 36.5319, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.164406418800354, | |
| "rewards/margins": -0.1354350596666336, | |
| "rewards/rejected": -0.028971344232559204, | |
| "rewards/safe_rewards": 0.2160978764295578, | |
| "rewards/unsafe_rewards": -0.5449106097221375, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.5555995063627836e-07, | |
| "logits/chosen": -2.415065050125122, | |
| "logits/rejected": -2.194133758544922, | |
| "logps/chosen": -222.50820922851562, | |
| "logps/rejected": -191.37088012695312, | |
| "loss": 16.1129, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.1634823977947235, | |
| "rewards/margins": 0.29501140117645264, | |
| "rewards/rejected": -0.1315290331840515, | |
| "rewards/safe_rewards": -0.023002928122878075, | |
| "rewards/unsafe_rewards": 0.34996774792671204, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.512834879902715e-07, | |
| "logits/chosen": -2.446582794189453, | |
| "logits/rejected": -2.2151386737823486, | |
| "logps/chosen": -193.52993774414062, | |
| "logps/rejected": -169.22207641601562, | |
| "loss": 17.2298, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.15118324756622314, | |
| "rewards/margins": 0.320087730884552, | |
| "rewards/rejected": -0.16890448331832886, | |
| "rewards/safe_rewards": 0.15818454325199127, | |
| "rewards/unsafe_rewards": 0.14418195188045502, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.4697126900319616e-07, | |
| "logits/chosen": -2.4158897399902344, | |
| "logits/rejected": -2.180227756500244, | |
| "logps/chosen": -200.93173217773438, | |
| "logps/rejected": -167.99073791503906, | |
| "loss": 22.7375, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.10535750538110733, | |
| "rewards/margins": -0.006962819490581751, | |
| "rewards/rejected": 0.11232032626867294, | |
| "rewards/safe_rewards": -0.18741589784622192, | |
| "rewards/unsafe_rewards": 0.3981309235095978, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.426248160275693e-07, | |
| "logits/chosen": -2.4130988121032715, | |
| "logits/rejected": -2.223747730255127, | |
| "logps/chosen": -196.2846221923828, | |
| "logps/rejected": -177.1783447265625, | |
| "loss": 62.6098, | |
| "rewards/accuracies": 0.47187501192092896, | |
| "rewards/chosen": 0.08924231678247452, | |
| "rewards/margins": -0.5832756757736206, | |
| "rewards/rejected": 0.6725180745124817, | |
| "rewards/safe_rewards": 0.08092136681079865, | |
| "rewards/unsafe_rewards": 0.09756331145763397, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.3824566350161094e-07, | |
| "logits/chosen": -2.4248764514923096, | |
| "logits/rejected": -2.1799604892730713, | |
| "logps/chosen": -211.0237274169922, | |
| "logps/rejected": -165.1766815185547, | |
| "loss": 8.7437, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.5879140496253967, | |
| "rewards/margins": 0.23111946880817413, | |
| "rewards/rejected": 0.356794536113739, | |
| "rewards/safe_rewards": 0.5438351631164551, | |
| "rewards/unsafe_rewards": 0.6319928765296936, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.338353574075381e-07, | |
| "logits/chosen": -2.3919012546539307, | |
| "logits/rejected": -2.212056875228882, | |
| "logps/chosen": -188.0956268310547, | |
| "logps/rejected": -166.2266387939453, | |
| "loss": 23.4515, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 0.4208035469055176, | |
| "rewards/margins": 0.025497043505311012, | |
| "rewards/rejected": 0.3953064978122711, | |
| "rewards/safe_rewards": 0.5599286556243896, | |
| "rewards/unsafe_rewards": 0.2816784679889679, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.2939545472578314e-07, | |
| "logits/chosen": -2.4613280296325684, | |
| "logits/rejected": -2.1779792308807373, | |
| "logps/chosen": -220.7722625732422, | |
| "logps/rejected": -177.66567993164062, | |
| "loss": 71.1367, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.6730166077613831, | |
| "rewards/margins": 0.4062492251396179, | |
| "rewards/rejected": 0.26676732301712036, | |
| "rewards/safe_rewards": 0.1429261863231659, | |
| "rewards/unsafe_rewards": 1.2031069993972778, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.2492752288532916e-07, | |
| "logits/chosen": -2.4267163276672363, | |
| "logits/rejected": -2.2031116485595703, | |
| "logps/chosen": -192.3984832763672, | |
| "logps/rejected": -171.2382354736328, | |
| "loss": 46.0145, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.1684725284576416, | |
| "rewards/margins": -0.08331739902496338, | |
| "rewards/rejected": 0.25178998708724976, | |
| "rewards/safe_rewards": 0.12440772354602814, | |
| "rewards/unsafe_rewards": 0.21253737807273865, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.204331392103574e-07, | |
| "logits/chosen": -2.483734369277954, | |
| "logits/rejected": -2.2113869190216064, | |
| "logps/chosen": -211.577880859375, | |
| "logps/rejected": -163.4304656982422, | |
| "loss": 155.5441, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": 0.19171550869941711, | |
| "rewards/margins": -0.2531381845474243, | |
| "rewards/rejected": 0.44485369324684143, | |
| "rewards/safe_rewards": 0.10490121692419052, | |
| "rewards/unsafe_rewards": 0.2785297632217407, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.159138903634006e-07, | |
| "logits/chosen": -2.409116744995117, | |
| "logits/rejected": -2.2290921211242676, | |
| "logps/chosen": -203.94369506835938, | |
| "logps/rejected": -173.5029754638672, | |
| "loss": 9.3153, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": 0.4058583676815033, | |
| "rewards/margins": 0.18885207176208496, | |
| "rewards/rejected": 0.21700629591941833, | |
| "rewards/safe_rewards": 0.3220987915992737, | |
| "rewards/unsafe_rewards": 0.48961788415908813, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.1137137178519977e-07, | |
| "logits/chosen": -2.4068942070007324, | |
| "logits/rejected": -2.212474822998047, | |
| "logps/chosen": -184.1978759765625, | |
| "logps/rejected": -157.02920532226562, | |
| "loss": 47.3581, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.041205547749996185, | |
| "rewards/margins": -0.1057499423623085, | |
| "rewards/rejected": 0.06454440206289291, | |
| "rewards/safe_rewards": -0.18809974193572998, | |
| "rewards/unsafe_rewards": 0.10568861663341522, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.068071871314626e-07, | |
| "logits/chosen": -2.3744447231292725, | |
| "logits/rejected": -2.1711204051971436, | |
| "logps/chosen": -193.363525390625, | |
| "logps/rejected": -157.98092651367188, | |
| "loss": 36.4272, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.1274958997964859, | |
| "rewards/margins": -0.06520196795463562, | |
| "rewards/rejected": 0.19269786775112152, | |
| "rewards/safe_rewards": 0.27050352096557617, | |
| "rewards/unsafe_rewards": -0.015511776320636272, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.022229477067205e-07, | |
| "logits/chosen": -2.4298009872436523, | |
| "logits/rejected": -2.2137274742126465, | |
| "logps/chosen": -212.06454467773438, | |
| "logps/rejected": -162.7147216796875, | |
| "loss": 22.3251, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": 0.2818406820297241, | |
| "rewards/margins": 0.29059693217277527, | |
| "rewards/rejected": -0.008756252937018871, | |
| "rewards/safe_rewards": 0.12103135883808136, | |
| "rewards/unsafe_rewards": 0.4426499903202057, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.976202718954869e-07, | |
| "logits/chosen": -2.4414241313934326, | |
| "logits/rejected": -2.214113235473633, | |
| "logps/chosen": -208.3417510986328, | |
| "logps/rejected": -185.30526733398438, | |
| "loss": 15.9322, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.002296045422554016, | |
| "rewards/margins": 0.06845332682132721, | |
| "rewards/rejected": -0.0661572739481926, | |
| "rewards/safe_rewards": 0.11791107803583145, | |
| "rewards/unsafe_rewards": -0.11331899464130402, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.930007845909146e-07, | |
| "logits/chosen": -2.465981960296631, | |
| "logits/rejected": -2.2979178428649902, | |
| "logps/chosen": -220.63400268554688, | |
| "logps/rejected": -194.15982055664062, | |
| "loss": 20.6631, | |
| "rewards/accuracies": 0.44062501192092896, | |
| "rewards/chosen": 0.008412945084273815, | |
| "rewards/margins": -0.06071774289011955, | |
| "rewards/rejected": 0.06913068145513535, | |
| "rewards/safe_rewards": 0.15175995230674744, | |
| "rewards/unsafe_rewards": -0.13493406772613525, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.8836611662115634e-07, | |
| "logits/chosen": -2.411681890487671, | |
| "logits/rejected": -2.184065818786621, | |
| "logps/chosen": -201.34774780273438, | |
| "logps/rejected": -158.77896118164062, | |
| "loss": 53.4563, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.30983632802963257, | |
| "rewards/margins": 0.02700033411383629, | |
| "rewards/rejected": 0.282835990190506, | |
| "rewards/safe_rewards": 0.16020536422729492, | |
| "rewards/unsafe_rewards": 0.4594673216342926, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.8371790417362986e-07, | |
| "logits/chosen": -2.4363036155700684, | |
| "logits/rejected": -2.2508435249328613, | |
| "logps/chosen": -194.97052001953125, | |
| "logps/rejected": -184.87435913085938, | |
| "loss": 15.1437, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.3793017268180847, | |
| "rewards/margins": 0.050546444952487946, | |
| "rewards/rejected": 0.328755259513855, | |
| "rewards/safe_rewards": 0.453242689371109, | |
| "rewards/unsafe_rewards": 0.30536073446273804, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.7905778821739056e-07, | |
| "logits/chosen": -2.430182456970215, | |
| "logits/rejected": -2.181687116622925, | |
| "logps/chosen": -207.5760955810547, | |
| "logps/rejected": -161.82400512695312, | |
| "loss": 36.165, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.2813587188720703, | |
| "rewards/margins": -0.009190035052597523, | |
| "rewards/rejected": 0.29054874181747437, | |
| "rewards/safe_rewards": 0.5300852060317993, | |
| "rewards/unsafe_rewards": 0.03263214975595474, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.74387413923817e-07, | |
| "logits/chosen": -2.3779215812683105, | |
| "logits/rejected": -2.2126498222351074, | |
| "logps/chosen": -216.20980834960938, | |
| "logps/rejected": -191.72068786621094, | |
| "loss": 35.9574, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": 0.40009957551956177, | |
| "rewards/margins": 0.11698710918426514, | |
| "rewards/rejected": 0.28311246633529663, | |
| "rewards/safe_rewards": 0.3544066548347473, | |
| "rewards/unsafe_rewards": 0.4457924962043762, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.69708430085812e-07, | |
| "logits/chosen": -2.442641496658325, | |
| "logits/rejected": -2.2196171283721924, | |
| "logps/chosen": -210.2590789794922, | |
| "logps/rejected": -178.38427734375, | |
| "loss": 143.7915, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.8005061149597168, | |
| "rewards/margins": 0.7907932996749878, | |
| "rewards/rejected": 0.009712839499115944, | |
| "rewards/safe_rewards": 1.2023097276687622, | |
| "rewards/unsafe_rewards": 0.39870262145996094, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6502248853572504e-07, | |
| "logits/chosen": -2.397225856781006, | |
| "logits/rejected": -2.1839497089385986, | |
| "logps/chosen": -191.41046142578125, | |
| "logps/rejected": -162.9442901611328, | |
| "loss": 12.2808, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": 0.014359796419739723, | |
| "rewards/margins": -0.05102130025625229, | |
| "rewards/rejected": 0.06538109481334686, | |
| "rewards/safe_rewards": -0.18064935505390167, | |
| "rewards/unsafe_rewards": 0.20936894416809082, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.6033124356220325e-07, | |
| "logits/chosen": -2.364447593688965, | |
| "logits/rejected": -2.1461973190307617, | |
| "logps/chosen": -199.1238555908203, | |
| "logps/rejected": -159.5116729736328, | |
| "loss": 34.7958, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.03608076646924019, | |
| "rewards/margins": -0.22421510517597198, | |
| "rewards/rejected": 0.1881343126296997, | |
| "rewards/safe_rewards": 0.28278595209121704, | |
| "rewards/unsafe_rewards": -0.3549474775791168, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_logits/chosen": -2.0551209449768066, | |
| "eval_logits/rejected": -1.7989723682403564, | |
| "eval_logps/chosen": -130.9921875, | |
| "eval_logps/rejected": -92.4808578491211, | |
| "eval_loss": 0.7397361993789673, | |
| "eval_rewards/accuracies": 0.5028436779975891, | |
| "eval_rewards/chosen": -0.12634092569351196, | |
| "eval_rewards/margins": 0.00263192574493587, | |
| "eval_rewards/rejected": -0.1289728581905365, | |
| "eval_rewards/safe_rewards": -0.12365306168794632, | |
| "eval_rewards/unsafe_rewards": -0.1263761818408966, | |
| "eval_runtime": 1869.3277, | |
| "eval_samples_per_second": 17.677, | |
| "eval_steps_per_second": 1.105, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.55636351326173e-07, | |
| "logits/chosen": -2.4121344089508057, | |
| "logits/rejected": -2.2164716720581055, | |
| "logps/chosen": -214.9409637451172, | |
| "logps/rejected": -175.6654815673828, | |
| "loss": 72.6154, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.20922379195690155, | |
| "rewards/margins": -0.15886008739471436, | |
| "rewards/rejected": 0.3680838942527771, | |
| "rewards/safe_rewards": 0.6282423734664917, | |
| "rewards/unsafe_rewards": -0.20979471504688263, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.509394692761622e-07, | |
| "logits/chosen": -2.39310884475708, | |
| "logits/rejected": -2.1510488986968994, | |
| "logps/chosen": -218.1635284423828, | |
| "logps/rejected": -180.8001251220703, | |
| "loss": 79.5377, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.365112841129303, | |
| "rewards/margins": 0.28045016527175903, | |
| "rewards/rejected": 0.08466275036334991, | |
| "rewards/safe_rewards": 0.08056111633777618, | |
| "rewards/unsafe_rewards": 0.649664580821991, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.462422555631674e-07, | |
| "logits/chosen": -2.4212746620178223, | |
| "logits/rejected": -2.187579393386841, | |
| "logps/chosen": -197.0594024658203, | |
| "logps/rejected": -160.92257690429688, | |
| "loss": 30.6297, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.516227126121521, | |
| "rewards/margins": 0.2513945698738098, | |
| "rewards/rejected": 0.2648325562477112, | |
| "rewards/safe_rewards": 0.3707699179649353, | |
| "rewards/unsafe_rewards": 0.6616843938827515, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.415463684552728e-07, | |
| "logits/chosen": -2.3526053428649902, | |
| "logits/rejected": -2.168795585632324, | |
| "logps/chosen": -187.2362518310547, | |
| "logps/rejected": -158.90509033203125, | |
| "loss": 16.6677, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": 0.2424931526184082, | |
| "rewards/margins": -0.007419240660965443, | |
| "rewards/rejected": 0.2499123513698578, | |
| "rewards/safe_rewards": 0.3042396008968353, | |
| "rewards/unsafe_rewards": 0.1807466745376587, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.3685346575222807e-07, | |
| "logits/chosen": -2.388552188873291, | |
| "logits/rejected": -2.140934467315674, | |
| "logps/chosen": -206.6807098388672, | |
| "logps/rejected": -170.2689666748047, | |
| "loss": 9.8385, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.22210577130317688, | |
| "rewards/margins": -0.04671960324048996, | |
| "rewards/rejected": 0.26882538199424744, | |
| "rewards/safe_rewards": 0.3220168948173523, | |
| "rewards/unsafe_rewards": 0.12219462543725967, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.321652042001919e-07, | |
| "logits/chosen": -2.390388011932373, | |
| "logits/rejected": -2.10972261428833, | |
| "logps/chosen": -209.7392120361328, | |
| "logps/rejected": -183.13662719726562, | |
| "loss": 9.9643, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.16931462287902832, | |
| "rewards/margins": 0.08294131606817245, | |
| "rewards/rejected": 0.08637328445911407, | |
| "rewards/safe_rewards": 0.11830408871173859, | |
| "rewards/unsafe_rewards": 0.22032511234283447, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2748323890684662e-07, | |
| "logits/chosen": -2.3839309215545654, | |
| "logits/rejected": -2.1623384952545166, | |
| "logps/chosen": -198.49668884277344, | |
| "logps/rejected": -169.58737182617188, | |
| "loss": 11.8899, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.18948496878147125, | |
| "rewards/margins": -0.021000146865844727, | |
| "rewards/rejected": 0.2104850709438324, | |
| "rewards/safe_rewards": 0.07809984683990479, | |
| "rewards/unsafe_rewards": 0.3008700907230377, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2280922275709213e-07, | |
| "logits/chosen": -2.402510166168213, | |
| "logits/rejected": -2.1689133644104004, | |
| "logps/chosen": -204.17782592773438, | |
| "logps/rejected": -179.0993194580078, | |
| "loss": 46.4965, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.004160255193710327, | |
| "rewards/margins": 0.15431872010231018, | |
| "rewards/rejected": -0.1584789752960205, | |
| "rewards/safe_rewards": -0.011111170053482056, | |
| "rewards/unsafe_rewards": 0.0027906596660614014, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.1814480582952375e-07, | |
| "logits/chosen": -2.410515308380127, | |
| "logits/rejected": -2.184720993041992, | |
| "logps/chosen": -203.24267578125, | |
| "logps/rejected": -181.4256134033203, | |
| "loss": 102.4097, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 0.45176035165786743, | |
| "rewards/margins": 0.40241655707359314, | |
| "rewards/rejected": 0.04934380576014519, | |
| "rewards/safe_rewards": 0.11663278192281723, | |
| "rewards/unsafe_rewards": 0.7868879437446594, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.1349163481390187e-07, | |
| "logits/chosen": -2.397282600402832, | |
| "logits/rejected": -2.194654703140259, | |
| "logps/chosen": -193.00746154785156, | |
| "logps/rejected": -171.80690002441406, | |
| "loss": 7.6309, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.22561688721179962, | |
| "rewards/margins": 0.030238542705774307, | |
| "rewards/rejected": 0.1953783482313156, | |
| "rewards/safe_rewards": 0.10761779546737671, | |
| "rewards/unsafe_rewards": 0.34361597895622253, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0885135242981647e-07, | |
| "logits/chosen": -2.398287057876587, | |
| "logits/rejected": -2.1465389728546143, | |
| "logps/chosen": -213.0477752685547, | |
| "logps/rejected": -162.02694702148438, | |
| "loss": 7.6341, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.43572482466697693, | |
| "rewards/margins": 0.21323783695697784, | |
| "rewards/rejected": 0.22248701751232147, | |
| "rewards/safe_rewards": 0.5539884567260742, | |
| "rewards/unsafe_rewards": 0.3174612522125244, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0422559684675494e-07, | |
| "logits/chosen": -2.4309935569763184, | |
| "logits/rejected": -2.1530261039733887, | |
| "logps/chosen": -217.1282958984375, | |
| "logps/rejected": -168.8966522216797, | |
| "loss": 12.2909, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.23469574749469757, | |
| "rewards/margins": -0.31583258509635925, | |
| "rewards/rejected": 0.08113676309585571, | |
| "rewards/safe_rewards": -0.3013322949409485, | |
| "rewards/unsafe_rewards": -0.16805927455425262, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9961600110577457e-07, | |
| "logits/chosen": -2.349834680557251, | |
| "logits/rejected": -2.1397252082824707, | |
| "logps/chosen": -207.71615600585938, | |
| "logps/rejected": -192.11148071289062, | |
| "loss": 83.4484, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.3409779667854309, | |
| "rewards/margins": -0.09031665325164795, | |
| "rewards/rejected": -0.25066131353378296, | |
| "rewards/safe_rewards": -0.046450722962617874, | |
| "rewards/unsafe_rewards": -0.6355050802230835, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.950241925429867e-07, | |
| "logits/chosen": -2.4354217052459717, | |
| "logits/rejected": -2.2282073497772217, | |
| "logps/chosen": -202.4095458984375, | |
| "logps/rejected": -172.94119262695312, | |
| "loss": 10.2059, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.13358193635940552, | |
| "rewards/margins": 0.09321316331624985, | |
| "rewards/rejected": -0.22679507732391357, | |
| "rewards/safe_rewards": -0.3772156536579132, | |
| "rewards/unsafe_rewards": 0.110051728785038, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9045179221505495e-07, | |
| "logits/chosen": -2.385145664215088, | |
| "logits/rejected": -2.1816518306732178, | |
| "logps/chosen": -222.2650909423828, | |
| "logps/rejected": -183.89297485351562, | |
| "loss": 70.6764, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.3544660210609436, | |
| "rewards/margins": -0.31712788343429565, | |
| "rewards/rejected": -0.037338145077228546, | |
| "rewards/safe_rewards": -0.14638884365558624, | |
| "rewards/unsafe_rewards": -0.5625432729721069, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8590041432690893e-07, | |
| "logits/chosen": -2.3393194675445557, | |
| "logits/rejected": -2.157670736312866, | |
| "logps/chosen": -191.87765502929688, | |
| "logps/rejected": -167.9620819091797, | |
| "loss": 15.8742, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": 0.08758888393640518, | |
| "rewards/margins": 0.09322256594896317, | |
| "rewards/rejected": -0.005633688066154718, | |
| "rewards/safe_rewards": 0.3342171907424927, | |
| "rewards/unsafe_rewards": -0.1590394526720047, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.813716656618788e-07, | |
| "logits/chosen": -2.371502637863159, | |
| "logits/rejected": -2.179802417755127, | |
| "logps/chosen": -185.43954467773438, | |
| "logps/rejected": -159.95692443847656, | |
| "loss": 31.7421, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.07055462151765823, | |
| "rewards/margins": -0.016725819557905197, | |
| "rewards/rejected": 0.08728043735027313, | |
| "rewards/safe_rewards": 0.0999542772769928, | |
| "rewards/unsafe_rewards": 0.041154973208904266, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.7686714501444788e-07, | |
| "logits/chosen": -2.408245086669922, | |
| "logits/rejected": -2.111708402633667, | |
| "logps/chosen": -220.0321807861328, | |
| "logps/rejected": -177.4727783203125, | |
| "loss": 30.3254, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.08441750705242157, | |
| "rewards/margins": -0.06825534999370575, | |
| "rewards/rejected": 0.15267284214496613, | |
| "rewards/safe_rewards": -0.1991117298603058, | |
| "rewards/unsafe_rewards": 0.36794668436050415, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7238844262582768e-07, | |
| "logits/chosen": -2.3922970294952393, | |
| "logits/rejected": -2.2358450889587402, | |
| "logps/chosen": -214.66928100585938, | |
| "logps/rejected": -185.44973754882812, | |
| "loss": 25.5158, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.1254725158214569, | |
| "rewards/margins": -0.16257312893867493, | |
| "rewards/rejected": 0.28804564476013184, | |
| "rewards/safe_rewards": -0.2854710817337036, | |
| "rewards/unsafe_rewards": 0.5364161133766174, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.679371396225504e-07, | |
| "logits/chosen": -2.381708860397339, | |
| "logits/rejected": -2.1555020809173584, | |
| "logps/chosen": -204.30628967285156, | |
| "logps/rejected": -180.3300323486328, | |
| "loss": 22.5219, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": 0.1503904014825821, | |
| "rewards/margins": -0.30795037746429443, | |
| "rewards/rejected": 0.45834073424339294, | |
| "rewards/safe_rewards": 0.4240906834602356, | |
| "rewards/unsafe_rewards": -0.12330994755029678, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.6351480745828096e-07, | |
| "logits/chosen": -2.4050099849700928, | |
| "logits/rejected": -2.1825802326202393, | |
| "logps/chosen": -198.45777893066406, | |
| "logps/rejected": -172.17959594726562, | |
| "loss": 37.0212, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.6139317750930786, | |
| "rewards/margins": 0.5727913975715637, | |
| "rewards/rejected": 0.04114028066396713, | |
| "rewards/safe_rewards": 0.8607079386711121, | |
| "rewards/unsafe_rewards": 0.3671554923057556, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.5912300735904248e-07, | |
| "logits/chosen": -2.4449119567871094, | |
| "logits/rejected": -2.174882173538208, | |
| "logps/chosen": -223.2691192626953, | |
| "logps/rejected": -173.9079132080078, | |
| "loss": 21.6142, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": 0.29134517908096313, | |
| "rewards/margins": 0.21831652522087097, | |
| "rewards/rejected": 0.07302861660718918, | |
| "rewards/safe_rewards": 0.24711818993091583, | |
| "rewards/unsafe_rewards": 0.33557215332984924, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.5476328977205395e-07, | |
| "logits/chosen": -2.383089542388916, | |
| "logits/rejected": -2.1814026832580566, | |
| "logps/chosen": -195.18643188476562, | |
| "logps/rejected": -165.39920043945312, | |
| "loss": 279.4912, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.45090776681900024, | |
| "rewards/margins": -0.30230337381362915, | |
| "rewards/rejected": -0.14860430359840393, | |
| "rewards/safe_rewards": 0.5930166840553284, | |
| "rewards/unsafe_rewards": -1.4948322772979736, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.5043719381837112e-07, | |
| "logits/chosen": -2.4133849143981934, | |
| "logits/rejected": -2.2195193767547607, | |
| "logps/chosen": -219.2970428466797, | |
| "logps/rejected": -189.27816772460938, | |
| "loss": 29.5997, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.13977238535881042, | |
| "rewards/margins": 0.13596734404563904, | |
| "rewards/rejected": 0.0038050352595746517, | |
| "rewards/safe_rewards": 0.10270917415618896, | |
| "rewards/unsafe_rewards": 0.17683559656143188, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.461462467495284e-07, | |
| "logits/chosen": -2.3971149921417236, | |
| "logits/rejected": -2.2017760276794434, | |
| "logps/chosen": -195.2748260498047, | |
| "logps/rejected": -168.03221130371094, | |
| "loss": 29.824, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.27122873067855835, | |
| "rewards/margins": 0.40271610021591187, | |
| "rewards/rejected": -0.13148736953735352, | |
| "rewards/safe_rewards": 0.28425708413124084, | |
| "rewards/unsafe_rewards": 0.25820040702819824, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.4189196340836865e-07, | |
| "logits/chosen": -2.4611334800720215, | |
| "logits/rejected": -2.2188827991485596, | |
| "logps/chosen": -199.0708465576172, | |
| "logps/rejected": -166.50717163085938, | |
| "loss": 42.7807, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.1606270670890808, | |
| "rewards/margins": 0.13977651298046112, | |
| "rewards/rejected": 0.020850548520684242, | |
| "rewards/safe_rewards": 0.23887856304645538, | |
| "rewards/unsafe_rewards": 0.08237558603286743, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.3767584569425561e-07, | |
| "logits/chosen": -2.5276553630828857, | |
| "logits/rejected": -2.2806928157806396, | |
| "logps/chosen": -214.76614379882812, | |
| "logps/rejected": -178.0789031982422, | |
| "loss": 7.7411, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.06127176806330681, | |
| "rewards/margins": 0.017500977963209152, | |
| "rewards/rejected": 0.043770790100097656, | |
| "rewards/safe_rewards": 0.12450921535491943, | |
| "rewards/unsafe_rewards": -0.0019656748045235872, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.334993820328541e-07, | |
| "logits/chosen": -2.461317539215088, | |
| "logits/rejected": -2.2503418922424316, | |
| "logps/chosen": -204.41952514648438, | |
| "logps/rejected": -171.56008911132812, | |
| "loss": 88.8508, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": -0.142044335603714, | |
| "rewards/margins": -0.2231227457523346, | |
| "rewards/rejected": 0.0810784175992012, | |
| "rewards/safe_rewards": -0.2867421507835388, | |
| "rewards/unsafe_rewards": 0.002653457224369049, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.2936404685066852e-07, | |
| "logits/chosen": -2.3843283653259277, | |
| "logits/rejected": -2.1979799270629883, | |
| "logps/chosen": -205.46273803710938, | |
| "logps/rejected": -180.91793823242188, | |
| "loss": 66.3165, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.1233854666352272, | |
| "rewards/margins": -0.03703648969531059, | |
| "rewards/rejected": -0.0863489955663681, | |
| "rewards/safe_rewards": -0.44312816858291626, | |
| "rewards/unsafe_rewards": 0.19635725021362305, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.252713000545221e-07, | |
| "logits/chosen": -2.455895185470581, | |
| "logits/rejected": -2.2126731872558594, | |
| "logps/chosen": -211.90866088867188, | |
| "logps/rejected": -172.7696533203125, | |
| "loss": 8.9746, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.057679928839206696, | |
| "rewards/margins": -0.1852763295173645, | |
| "rewards/rejected": 0.2429562509059906, | |
| "rewards/safe_rewards": 0.14237050712108612, | |
| "rewards/unsafe_rewards": -0.027010658755898476, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.2122258651616304e-07, | |
| "logits/chosen": -2.445269823074341, | |
| "logits/rejected": -2.224661350250244, | |
| "logps/chosen": -209.90713500976562, | |
| "logps/rejected": -173.6033935546875, | |
| "loss": 63.3258, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -0.0619967095553875, | |
| "rewards/margins": -0.29980406165122986, | |
| "rewards/rejected": 0.23780739307403564, | |
| "rewards/safe_rewards": -0.2617853283882141, | |
| "rewards/unsafe_rewards": 0.13779191672801971, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.1721933556217792e-07, | |
| "logits/chosen": -2.4175376892089844, | |
| "logits/rejected": -2.23214054107666, | |
| "logps/chosen": -195.77786254882812, | |
| "logps/rejected": -175.40225219726562, | |
| "loss": 11.5399, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.27427220344543457, | |
| "rewards/margins": 0.10898621380329132, | |
| "rewards/rejected": 0.16528600454330444, | |
| "rewards/safe_rewards": 0.21214981377124786, | |
| "rewards/unsafe_rewards": 0.3363945782184601, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1326296046939333e-07, | |
| "logits/chosen": -2.3801956176757812, | |
| "logits/rejected": -2.162496328353882, | |
| "logps/chosen": -184.91856384277344, | |
| "logps/rejected": -153.4582061767578, | |
| "loss": 63.4268, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.12642014026641846, | |
| "rewards/margins": -0.3640880286693573, | |
| "rewards/rejected": 0.23766788840293884, | |
| "rewards/safe_rewards": -0.23899023234844208, | |
| "rewards/unsafe_rewards": -0.013849982991814613, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.0935485796594351e-07, | |
| "logits/chosen": -2.4861056804656982, | |
| "logits/rejected": -2.239741086959839, | |
| "logps/chosen": -222.3768310546875, | |
| "logps/rejected": -176.0164337158203, | |
| "loss": 21.3914, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.18739810585975647, | |
| "rewards/margins": 0.09455545991659164, | |
| "rewards/rejected": 0.09284263849258423, | |
| "rewards/safe_rewards": 0.19133667647838593, | |
| "rewards/unsafe_rewards": 0.1834595501422882, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.0549640773818028e-07, | |
| "logits/chosen": -2.4289638996124268, | |
| "logits/rejected": -2.237046003341675, | |
| "logps/chosen": -204.95181274414062, | |
| "logps/rejected": -158.824951171875, | |
| "loss": 8.4938, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": 0.01927146315574646, | |
| "rewards/margins": -0.0841434970498085, | |
| "rewards/rejected": 0.10341496765613556, | |
| "rewards/safe_rewards": 0.04857074096798897, | |
| "rewards/unsafe_rewards": -0.010027825832366943, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.0168897194359921e-07, | |
| "logits/chosen": -2.4466030597686768, | |
| "logits/rejected": -2.194831132888794, | |
| "logps/chosen": -222.03775024414062, | |
| "logps/rejected": -183.56564331054688, | |
| "loss": 82.0212, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.2902601659297943, | |
| "rewards/margins": 0.6036115884780884, | |
| "rewards/rejected": -0.3133513927459717, | |
| "rewards/safe_rewards": 0.4282899498939514, | |
| "rewards/unsafe_rewards": 0.152230367064476, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.793389472995392e-08, | |
| "logits/chosen": -2.4077987670898438, | |
| "logits/rejected": -2.1739821434020996, | |
| "logps/chosen": -209.699951171875, | |
| "logps/rejected": -166.60293579101562, | |
| "loss": 11.3477, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.014321346767246723, | |
| "rewards/margins": -0.030286794528365135, | |
| "rewards/rejected": 0.015965450555086136, | |
| "rewards/safe_rewards": 0.03987512364983559, | |
| "rewards/unsafe_rewards": -0.06851781159639359, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.423250176072874e-08, | |
| "logits/chosen": -2.401275634765625, | |
| "logits/rejected": -2.192737340927124, | |
| "logps/chosen": -181.48147583007812, | |
| "logps/rejected": -154.23431396484375, | |
| "loss": 15.9486, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.0320340096950531, | |
| "rewards/margins": 0.20615582168102264, | |
| "rewards/rejected": -0.23818981647491455, | |
| "rewards/safe_rewards": 0.015104318037629128, | |
| "rewards/unsafe_rewards": -0.07917235046625137, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.058609974713654e-08, | |
| "logits/chosen": -2.4539401531219482, | |
| "logits/rejected": -2.1792826652526855, | |
| "logps/chosen": -206.2873992919922, | |
| "logps/rejected": -171.813232421875, | |
| "loss": 27.4047, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.08179975301027298, | |
| "rewards/margins": -0.07024437934160233, | |
| "rewards/rejected": -0.011555373668670654, | |
| "rewards/safe_rewards": 0.006575888488441706, | |
| "rewards/unsafe_rewards": -0.17017540335655212, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.699597598680753e-08, | |
| "logits/chosen": -2.3884987831115723, | |
| "logits/rejected": -2.1706833839416504, | |
| "logps/chosen": -183.61544799804688, | |
| "logps/rejected": -168.7871856689453, | |
| "loss": 34.4575, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.44646185636520386, | |
| "rewards/margins": 0.1550489217042923, | |
| "rewards/rejected": 0.29141297936439514, | |
| "rewards/safe_rewards": 0.32389289140701294, | |
| "rewards/unsafe_rewards": 0.5690308809280396, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 8.346339790933166e-08, | |
| "logits/chosen": -2.4721839427948, | |
| "logits/rejected": -2.2297019958496094, | |
| "logps/chosen": -200.0784149169922, | |
| "logps/rejected": -159.7423858642578, | |
| "loss": 6.7397, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": 0.12982772290706635, | |
| "rewards/margins": -0.028519075363874435, | |
| "rewards/rejected": 0.15834678709506989, | |
| "rewards/safe_rewards": -0.03010488487780094, | |
| "rewards/unsafe_rewards": 0.2897603511810303, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.998961262881506e-08, | |
| "logits/chosen": -2.418222665786743, | |
| "logits/rejected": -2.1581873893737793, | |
| "logps/chosen": -220.6064453125, | |
| "logps/rejected": -172.82266235351562, | |
| "loss": 6.4288, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": 0.35179823637008667, | |
| "rewards/margins": 0.23103070259094238, | |
| "rewards/rejected": 0.12076754868030548, | |
| "rewards/safe_rewards": 0.3257552981376648, | |
| "rewards/unsafe_rewards": 0.37784117460250854, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.657584650360846e-08, | |
| "logits/chosen": -2.396697521209717, | |
| "logits/rejected": -2.2003862857818604, | |
| "logps/chosen": -199.44009399414062, | |
| "logps/rejected": -172.6617431640625, | |
| "loss": 35.7268, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.024801719933748245, | |
| "rewards/margins": 0.26587414741516113, | |
| "rewards/rejected": -0.2906758785247803, | |
| "rewards/safe_rewards": 0.05263688042759895, | |
| "rewards/unsafe_rewards": -0.10224030166864395, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.322330470336313e-08, | |
| "logits/chosen": -2.3913733959198, | |
| "logits/rejected": -2.189946413040161, | |
| "logps/chosen": -190.08120727539062, | |
| "logps/rejected": -170.0216522216797, | |
| "loss": 9.582, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.11784199625253677, | |
| "rewards/margins": -0.031096214428544044, | |
| "rewards/rejected": -0.08674577623605728, | |
| "rewards/safe_rewards": -0.293480783700943, | |
| "rewards/unsafe_rewards": 0.05779681354761124, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.993317078356709e-08, | |
| "logits/chosen": -2.3910608291625977, | |
| "logits/rejected": -2.2192938327789307, | |
| "logps/chosen": -199.07406616210938, | |
| "logps/rejected": -170.1977996826172, | |
| "loss": 45.9652, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.10676655918359756, | |
| "rewards/margins": -0.08676379173994064, | |
| "rewards/rejected": -0.02000277489423752, | |
| "rewards/safe_rewards": -0.0720798522233963, | |
| "rewards/unsafe_rewards": -0.14145328104496002, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.67066062677118e-08, | |
| "logits/chosen": -2.4357597827911377, | |
| "logits/rejected": -2.2244791984558105, | |
| "logps/chosen": -208.4618682861328, | |
| "logps/rejected": -167.52764892578125, | |
| "loss": 20.8808, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": 0.2480795830488205, | |
| "rewards/margins": -0.08884197473526001, | |
| "rewards/rejected": 0.3369216322898865, | |
| "rewards/safe_rewards": 0.1537085473537445, | |
| "rewards/unsafe_rewards": 0.34245067834854126, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.354475023723685e-08, | |
| "logits/chosen": -2.3960747718811035, | |
| "logits/rejected": -2.1642906665802, | |
| "logps/chosen": -216.65756225585938, | |
| "logps/rejected": -171.6775665283203, | |
| "loss": 59.1855, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.5152679681777954, | |
| "rewards/margins": 0.2937913239002228, | |
| "rewards/rejected": 0.22147664427757263, | |
| "rewards/safe_rewards": 0.2597041726112366, | |
| "rewards/unsafe_rewards": 0.7708317041397095, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.044871892939746e-08, | |
| "logits/chosen": -2.4158756732940674, | |
| "logits/rejected": -2.2148139476776123, | |
| "logps/chosen": -225.4951171875, | |
| "logps/rejected": -189.0193328857422, | |
| "loss": 30.3887, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.14366625249385834, | |
| "rewards/margins": 0.25630486011505127, | |
| "rewards/rejected": -0.11263859272003174, | |
| "rewards/safe_rewards": 0.08603324741125107, | |
| "rewards/unsafe_rewards": 0.20129923522472382, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.741960534319676e-08, | |
| "logits/chosen": -2.391890525817871, | |
| "logits/rejected": -2.2089953422546387, | |
| "logps/chosen": -190.7472686767578, | |
| "logps/rejected": -160.5789031982422, | |
| "loss": 29.5828, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.04550771787762642, | |
| "rewards/margins": -0.21935884654521942, | |
| "rewards/rejected": 0.1738511323928833, | |
| "rewards/safe_rewards": -0.12179826200008392, | |
| "rewards/unsafe_rewards": 0.030782824382185936, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.44584788535217e-08, | |
| "logits/chosen": -2.4144439697265625, | |
| "logits/rejected": -2.209897994995117, | |
| "logps/chosen": -213.13632202148438, | |
| "logps/rejected": -176.97024536132812, | |
| "loss": 15.9924, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.15160061419010162, | |
| "rewards/margins": -0.01647660695016384, | |
| "rewards/rejected": -0.13512399792671204, | |
| "rewards/safe_rewards": -0.37716132402420044, | |
| "rewards/unsafe_rewards": 0.07396010309457779, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_logits/chosen": -2.084881544113159, | |
| "eval_logits/rejected": -1.833509922027588, | |
| "eval_logps/chosen": -131.02365112304688, | |
| "eval_logps/rejected": -92.45955657958984, | |
| "eval_loss": 0.6823093295097351, | |
| "eval_rewards/accuracies": 0.4713214039802551, | |
| "eval_rewards/chosen": -0.1577797532081604, | |
| "eval_rewards/margins": -0.05011267587542534, | |
| "eval_rewards/rejected": -0.10766706615686417, | |
| "eval_rewards/safe_rewards": -0.15565218031406403, | |
| "eval_rewards/unsafe_rewards": -0.15351000428199768, | |
| "eval_runtime": 1880.4558, | |
| "eval_samples_per_second": 17.572, | |
| "eval_steps_per_second": 1.099, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.156638483361933e-08, | |
| "logits/chosen": -2.435300827026367, | |
| "logits/rejected": -2.1943700313568115, | |
| "logps/chosen": -206.97384643554688, | |
| "logps/rejected": -174.73373413085938, | |
| "loss": 6.0946, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": -0.2385288029909134, | |
| "rewards/margins": 0.05665416270494461, | |
| "rewards/rejected": -0.295183002948761, | |
| "rewards/safe_rewards": -0.1594325453042984, | |
| "rewards/unsafe_rewards": -0.3176250755786896, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.8744344286046236e-08, | |
| "logits/chosen": -2.4003233909606934, | |
| "logits/rejected": -2.177899122238159, | |
| "logps/chosen": -207.0956573486328, | |
| "logps/rejected": -169.01504516601562, | |
| "loss": 45.2569, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": 0.26545172929763794, | |
| "rewards/margins": 0.4598563611507416, | |
| "rewards/rejected": -0.19440460205078125, | |
| "rewards/safe_rewards": 0.313471257686615, | |
| "rewards/unsafe_rewards": 0.21743226051330566, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.599335348222169e-08, | |
| "logits/chosen": -2.4335553646087646, | |
| "logits/rejected": -2.246596574783325, | |
| "logps/chosen": -207.1642608642578, | |
| "logps/rejected": -186.18722534179688, | |
| "loss": 5.5153, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.0844786986708641, | |
| "rewards/margins": 0.10317480564117432, | |
| "rewards/rejected": -0.1876535415649414, | |
| "rewards/safe_rewards": -0.22987417876720428, | |
| "rewards/unsafe_rewards": 0.06091681867837906, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.331438361071163e-08, | |
| "logits/chosen": -2.3511147499084473, | |
| "logits/rejected": -2.206602096557617, | |
| "logps/chosen": -212.7078857421875, | |
| "logps/rejected": -194.2686767578125, | |
| "loss": 21.5544, | |
| "rewards/accuracies": 0.47187501192092896, | |
| "rewards/chosen": -0.17899686098098755, | |
| "rewards/margins": 0.10844133794307709, | |
| "rewards/rejected": -0.28743821382522583, | |
| "rewards/safe_rewards": -0.30973827838897705, | |
| "rewards/unsafe_rewards": -0.04825545474886894, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.0708380434367864e-08, | |
| "logits/chosen": -2.4302127361297607, | |
| "logits/rejected": -2.1903905868530273, | |
| "logps/chosen": -199.45376586914062, | |
| "logps/rejected": -171.4712677001953, | |
| "loss": 11.7548, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.16563379764556885, | |
| "rewards/margins": -0.16774305701255798, | |
| "rewards/rejected": 0.0021092891693115234, | |
| "rewards/safe_rewards": -0.17964015901088715, | |
| "rewards/unsafe_rewards": -0.15162742137908936, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.817626395644305e-08, | |
| "logits/chosen": -2.428711414337158, | |
| "logits/rejected": -2.232553005218506, | |
| "logps/chosen": -206.1396942138672, | |
| "logps/rejected": -177.48374938964844, | |
| "loss": 20.469, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.2369929999113083, | |
| "rewards/margins": -0.06113150715827942, | |
| "rewards/rejected": -0.17586149275302887, | |
| "rewards/safe_rewards": -0.15375518798828125, | |
| "rewards/unsafe_rewards": -0.32023078203201294, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.571892809580013e-08, | |
| "logits/chosen": -2.395301580429077, | |
| "logits/rejected": -2.1873881816864014, | |
| "logps/chosen": -195.25765991210938, | |
| "logps/rejected": -175.76754760742188, | |
| "loss": 47.6931, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.030344385653734207, | |
| "rewards/margins": 0.31907138228416443, | |
| "rewards/rejected": -0.34941577911376953, | |
| "rewards/safe_rewards": -0.09232734888792038, | |
| "rewards/unsafe_rewards": 0.03163857385516167, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.333724037132976e-08, | |
| "logits/chosen": -2.4109716415405273, | |
| "logits/rejected": -2.1891541481018066, | |
| "logps/chosen": -198.6385040283203, | |
| "logps/rejected": -170.99563598632812, | |
| "loss": 5.2251, | |
| "rewards/accuracies": 0.4281249940395355, | |
| "rewards/chosen": -0.1846873015165329, | |
| "rewards/margins": -0.06299707293510437, | |
| "rewards/rejected": -0.12169022858142853, | |
| "rewards/safe_rewards": -0.1805131733417511, | |
| "rewards/unsafe_rewards": -0.18886145949363708, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.1032041595688506e-08, | |
| "logits/chosen": -2.3785929679870605, | |
| "logits/rejected": -2.171466827392578, | |
| "logps/chosen": -216.2442626953125, | |
| "logps/rejected": -185.38406372070312, | |
| "loss": 21.8766, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.09203994274139404, | |
| "rewards/margins": -0.34605592489242554, | |
| "rewards/rejected": 0.2540159523487091, | |
| "rewards/safe_rewards": -0.014932965859770775, | |
| "rewards/unsafe_rewards": -0.16914694011211395, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.880414557846453e-08, | |
| "logits/chosen": -2.4211525917053223, | |
| "logits/rejected": -2.259765863418579, | |
| "logps/chosen": -200.02296447753906, | |
| "logps/rejected": -164.5922393798828, | |
| "loss": 78.4789, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": -0.19741728901863098, | |
| "rewards/margins": 0.0014421313535422087, | |
| "rewards/rejected": -0.19885942339897156, | |
| "rewards/safe_rewards": -0.07222781330347061, | |
| "rewards/unsafe_rewards": -0.32260677218437195, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.6654338838876662e-08, | |
| "logits/chosen": -2.4327399730682373, | |
| "logits/rejected": -2.1489098072052, | |
| "logps/chosen": -206.57406616210938, | |
| "logps/rejected": -162.18191528320312, | |
| "loss": 31.8357, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.03170815855264664, | |
| "rewards/margins": 0.1048579216003418, | |
| "rewards/rejected": -0.13656608760356903, | |
| "rewards/safe_rewards": -0.28655681014060974, | |
| "rewards/unsafe_rewards": 0.22314047813415527, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.4583380328107805e-08, | |
| "logits/chosen": -2.4065799713134766, | |
| "logits/rejected": -2.168668508529663, | |
| "logps/chosen": -219.0827178955078, | |
| "logps/rejected": -174.2585906982422, | |
| "loss": 19.5756, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.28594768047332764, | |
| "rewards/margins": -0.2515542805194855, | |
| "rewards/rejected": -0.034393392503261566, | |
| "rewards/safe_rewards": -0.40954461693763733, | |
| "rewards/unsafe_rewards": -0.16235077381134033, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.259200116137039e-08, | |
| "logits/chosen": -2.381093740463257, | |
| "logits/rejected": -2.1939659118652344, | |
| "logps/chosen": -204.22921752929688, | |
| "logps/rejected": -187.06576538085938, | |
| "loss": 169.2986, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.762048602104187, | |
| "rewards/margins": 0.7863933444023132, | |
| "rewards/rejected": -0.024344712495803833, | |
| "rewards/safe_rewards": 0.9233208894729614, | |
| "rewards/unsafe_rewards": 0.6007765531539917, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.068090435979958e-08, | |
| "logits/chosen": -2.3571343421936035, | |
| "logits/rejected": -2.1805238723754883, | |
| "logps/chosen": -194.33248901367188, | |
| "logps/rejected": -165.1163787841797, | |
| "loss": 56.9496, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.3039317727088928, | |
| "rewards/margins": -0.40204334259033203, | |
| "rewards/rejected": 0.09811154752969742, | |
| "rewards/safe_rewards": 0.22571036219596863, | |
| "rewards/unsafe_rewards": -0.8335739374160767, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.8850764602263423e-08, | |
| "logits/chosen": -2.415358304977417, | |
| "logits/rejected": -2.1450016498565674, | |
| "logps/chosen": -200.4285888671875, | |
| "logps/rejected": -173.5339813232422, | |
| "loss": 27.6237, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": -0.12354181706905365, | |
| "rewards/margins": -0.11882360279560089, | |
| "rewards/rejected": -0.0047182366251945496, | |
| "rewards/safe_rewards": -0.05137089639902115, | |
| "rewards/unsafe_rewards": -0.19571277499198914, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.710222798718028e-08, | |
| "logits/chosen": -2.4396722316741943, | |
| "logits/rejected": -2.2350778579711914, | |
| "logps/chosen": -203.3378448486328, | |
| "logps/rejected": -178.9970245361328, | |
| "loss": 22.9552, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": 0.003960543777793646, | |
| "rewards/margins": 0.2720267176628113, | |
| "rewards/rejected": -0.2680661380290985, | |
| "rewards/safe_rewards": 0.06284536421298981, | |
| "rewards/unsafe_rewards": -0.054924286901950836, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.5435911804424356e-08, | |
| "logits/chosen": -2.4028568267822266, | |
| "logits/rejected": -2.2349255084991455, | |
| "logps/chosen": -229.4883270263672, | |
| "logps/rejected": -185.6358642578125, | |
| "loss": 31.5896, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.2171170711517334, | |
| "rewards/margins": 0.017191190272569656, | |
| "rewards/rejected": -0.23430824279785156, | |
| "rewards/safe_rewards": 0.15241694450378418, | |
| "rewards/unsafe_rewards": -0.586651086807251, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.3852404317403199e-08, | |
| "logits/chosen": -2.395153284072876, | |
| "logits/rejected": -2.2008633613586426, | |
| "logps/chosen": -220.5502471923828, | |
| "logps/rejected": -194.44186401367188, | |
| "loss": 26.1714, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -0.21780619025230408, | |
| "rewards/margins": -0.0533115491271019, | |
| "rewards/rejected": -0.16449466347694397, | |
| "rewards/safe_rewards": -0.4852879047393799, | |
| "rewards/unsafe_rewards": 0.04967564344406128, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.235226455538113e-08, | |
| "logits/chosen": -2.4504330158233643, | |
| "logits/rejected": -2.2494871616363525, | |
| "logps/chosen": -201.50564575195312, | |
| "logps/rejected": -167.95364379882812, | |
| "loss": 5.2467, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.037842657417058945, | |
| "rewards/margins": -0.06888096779584885, | |
| "rewards/rejected": 0.031038302928209305, | |
| "rewards/safe_rewards": -0.037054188549518585, | |
| "rewards/unsafe_rewards": -0.0386311374604702, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.0936022116124321e-08, | |
| "logits/chosen": -2.4290854930877686, | |
| "logits/rejected": -2.204906463623047, | |
| "logps/chosen": -199.54847717285156, | |
| "logps/rejected": -165.02816772460938, | |
| "loss": 41.987, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -0.15811631083488464, | |
| "rewards/margins": -0.3054047226905823, | |
| "rewards/rejected": 0.14728839695453644, | |
| "rewards/safe_rewards": -0.39381590485572815, | |
| "rewards/unsafe_rewards": 0.07758323848247528, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.60417697893534e-09, | |
| "logits/chosen": -2.4069314002990723, | |
| "logits/rejected": -2.2242488861083984, | |
| "logps/chosen": -199.82015991210938, | |
| "logps/rejected": -173.9343719482422, | |
| "loss": 22.6453, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": 0.1551128476858139, | |
| "rewards/margins": -0.032198842614889145, | |
| "rewards/rejected": 0.18731167912483215, | |
| "rewards/safe_rewards": 0.21557751297950745, | |
| "rewards/unsafe_rewards": 0.09464815258979797, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 8.357199328144576e-09, | |
| "logits/chosen": -2.4046077728271484, | |
| "logits/rejected": -2.2161166667938232, | |
| "logps/chosen": -216.55093383789062, | |
| "logps/rejected": -187.6187286376953, | |
| "loss": 56.4505, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": 0.08073421567678452, | |
| "rewards/margins": -0.025125902146100998, | |
| "rewards/rejected": 0.10586012899875641, | |
| "rewards/safe_rewards": 0.07104392349720001, | |
| "rewards/unsafe_rewards": 0.09042453020811081, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.1955293871198144e-09, | |
| "logits/chosen": -2.4008450508117676, | |
| "logits/rejected": -2.261340379714966, | |
| "logps/chosen": -187.19436645507812, | |
| "logps/rejected": -169.91722106933594, | |
| "loss": 18.4483, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.12119190394878387, | |
| "rewards/margins": -0.34029996395111084, | |
| "rewards/rejected": 0.21910807490348816, | |
| "rewards/safe_rewards": -0.20904748141765594, | |
| "rewards/unsafe_rewards": -0.03333630412817001, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.119577262853254e-09, | |
| "logits/chosen": -2.4227774143218994, | |
| "logits/rejected": -2.1880428791046143, | |
| "logps/chosen": -193.263671875, | |
| "logps/rejected": -162.72183227539062, | |
| "loss": 27.596, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.09984199702739716, | |
| "rewards/margins": -0.025053083896636963, | |
| "rewards/rejected": -0.07478892058134079, | |
| "rewards/safe_rewards": -0.26086345314979553, | |
| "rewards/unsafe_rewards": 0.06117943674325943, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.129722801180542e-09, | |
| "logits/chosen": -2.3443946838378906, | |
| "logits/rejected": -2.1799635887145996, | |
| "logps/chosen": -197.2679443359375, | |
| "logps/rejected": -180.6214599609375, | |
| "loss": 19.3736, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.3499890863895416, | |
| "rewards/margins": -0.13464350998401642, | |
| "rewards/rejected": -0.2153455764055252, | |
| "rewards/safe_rewards": -0.1743151694536209, | |
| "rewards/unsafe_rewards": -0.5256629586219788, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.226315452682816e-09, | |
| "logits/chosen": -2.413181781768799, | |
| "logits/rejected": -2.187439441680908, | |
| "logps/chosen": -196.54916381835938, | |
| "logps/rejected": -173.30929565429688, | |
| "loss": 31.0958, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.147268146276474, | |
| "rewards/margins": 0.10999511182308197, | |
| "rewards/rejected": -0.2572632431983948, | |
| "rewards/safe_rewards": -0.11902125179767609, | |
| "rewards/unsafe_rewards": -0.17551502585411072, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4096741493194193e-09, | |
| "logits/chosen": -2.443580389022827, | |
| "logits/rejected": -2.2651684284210205, | |
| "logps/chosen": -199.41049194335938, | |
| "logps/rejected": -173.95718383789062, | |
| "loss": 9.8716, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.056076280772686005, | |
| "rewards/margins": 0.028754467144608498, | |
| "rewards/rejected": 0.027321819216012955, | |
| "rewards/safe_rewards": 0.034864675253629684, | |
| "rewards/unsafe_rewards": 0.07728789001703262, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.6800871918346846e-09, | |
| "logits/chosen": -2.4057886600494385, | |
| "logits/rejected": -2.155165672302246, | |
| "logps/chosen": -203.48025512695312, | |
| "logps/rejected": -172.94015502929688, | |
| "loss": 41.8074, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.21421018242835999, | |
| "rewards/margins": -0.43174242973327637, | |
| "rewards/rejected": 0.2175322324037552, | |
| "rewards/safe_rewards": -0.06811753660440445, | |
| "rewards/unsafe_rewards": -0.36030280590057373, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.0378121479783796e-09, | |
| "logits/chosen": -2.389869213104248, | |
| "logits/rejected": -2.1555044651031494, | |
| "logps/chosen": -196.02059936523438, | |
| "logps/rejected": -167.43655395507812, | |
| "loss": 61.0971, | |
| "rewards/accuracies": 0.4781250059604645, | |
| "rewards/chosen": 0.008689677342772484, | |
| "rewards/margins": 0.18058201670646667, | |
| "rewards/rejected": -0.17189235985279083, | |
| "rewards/safe_rewards": 0.042548321187496185, | |
| "rewards/unsafe_rewards": -0.025168979540467262, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.4830757615760247e-09, | |
| "logits/chosen": -2.4289557933807373, | |
| "logits/rejected": -2.1850333213806152, | |
| "logps/chosen": -207.24124145507812, | |
| "logps/rejected": -170.49305725097656, | |
| "loss": 144.1229, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.07967615127563477, | |
| "rewards/margins": 0.1154344230890274, | |
| "rewards/rejected": -0.19511058926582336, | |
| "rewards/safe_rewards": -0.10584060847759247, | |
| "rewards/unsafe_rewards": -0.05351167917251587, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0160738724809548e-09, | |
| "logits/chosen": -2.4409990310668945, | |
| "logits/rejected": -2.207919120788574, | |
| "logps/chosen": -196.10601806640625, | |
| "logps/rejected": -171.36843872070312, | |
| "loss": 18.7773, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.08281825482845306, | |
| "rewards/margins": 0.23606376349925995, | |
| "rewards/rejected": -0.3188820481300354, | |
| "rewards/safe_rewards": 0.05616650730371475, | |
| "rewards/unsafe_rewards": -0.2218029946088791, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.369713474366212e-10, | |
| "logits/chosen": -2.420626640319824, | |
| "logits/rejected": -2.1977345943450928, | |
| "logps/chosen": -219.4222869873047, | |
| "logps/rejected": -181.95010375976562, | |
| "loss": 17.5266, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.16265834867954254, | |
| "rewards/margins": -0.11877261102199554, | |
| "rewards/rejected": -0.0438857302069664, | |
| "rewards/safe_rewards": -0.035406678915023804, | |
| "rewards/unsafe_rewards": -0.2899099886417389, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.459020218731512e-10, | |
| "logits/chosen": -2.4327456951141357, | |
| "logits/rejected": -2.220496654510498, | |
| "logps/chosen": -202.61898803710938, | |
| "logps/rejected": -167.197021484375, | |
| "loss": 43.7242, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": 0.15349379181861877, | |
| "rewards/margins": 0.14935937523841858, | |
| "rewards/rejected": 0.004134447779506445, | |
| "rewards/safe_rewards": 0.08035097271203995, | |
| "rewards/unsafe_rewards": 0.2266366183757782, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.429686526593088e-10, | |
| "logits/chosen": -2.398090124130249, | |
| "logits/rejected": -2.192744255065918, | |
| "logps/chosen": -206.80520629882812, | |
| "logps/rejected": -175.9212646484375, | |
| "loss": 23.3409, | |
| "rewards/accuracies": 0.453125, | |
| "rewards/chosen": 0.3794136941432953, | |
| "rewards/margins": 0.2774004638195038, | |
| "rewards/rejected": 0.10201327502727509, | |
| "rewards/safe_rewards": 0.5806846022605896, | |
| "rewards/unsafe_rewards": 0.17814283072948456, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.824288182584622e-11, | |
| "logits/chosen": -2.4241063594818115, | |
| "logits/rejected": -2.2421114444732666, | |
| "logps/chosen": -206.7459716796875, | |
| "logps/rejected": -168.176513671875, | |
| "loss": 19.5817, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.24208331108093262, | |
| "rewards/margins": -0.22288334369659424, | |
| "rewards/rejected": -0.019199971109628677, | |
| "rewards/safe_rewards": 0.039939720183610916, | |
| "rewards/unsafe_rewards": -0.5241063237190247, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1858, | |
| "total_flos": 0.0, | |
| "train_loss": 67.04043597990268, | |
| "train_runtime": 46860.0347, | |
| "train_samples_per_second": 1.269, | |
| "train_steps_per_second": 0.04 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1858, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |