| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997172745264349, |
| "eval_steps": 500, |
| "global_step": 442, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022618037885213456, |
| "grad_norm": 141.01397939585976, |
| "learning_rate": 1.7777777777777777e-08, |
| "logits/chosen": -1.0503966808319092, |
| "logits/rejected": -1.0386303663253784, |
| "logps/chosen": -1.497732400894165, |
| "logps/rejected": -1.611051321029663, |
| "loss": 5.5018, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -14.977323532104492, |
| "rewards/margins": 1.1331881284713745, |
| "rewards/rejected": -16.110509872436523, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004523607577042691, |
| "grad_norm": 68.88925767431851, |
| "learning_rate": 3.5555555555555554e-08, |
| "logits/chosen": -1.104045033454895, |
| "logits/rejected": -1.1043524742126465, |
| "logps/chosen": -1.5607943534851074, |
| "logps/rejected": -1.5189738273620605, |
| "loss": 5.9744, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -15.60794448852539, |
| "rewards/margins": -0.4182056784629822, |
| "rewards/rejected": -15.189737319946289, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006785411365564037, |
| "grad_norm": 127.77084111180126, |
| "learning_rate": 5.333333333333333e-08, |
| "logits/chosen": -1.0770599842071533, |
| "logits/rejected": -1.0823699235916138, |
| "logps/chosen": -1.5453805923461914, |
| "logps/rejected": -1.722267746925354, |
| "loss": 6.0253, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -15.453805923461914, |
| "rewards/margins": 1.7688698768615723, |
| "rewards/rejected": -17.222675323486328, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.009047215154085382, |
| "grad_norm": 106.10699632777116, |
| "learning_rate": 7.111111111111111e-08, |
| "logits/chosen": -1.09734308719635, |
| "logits/rejected": -1.0833051204681396, |
| "logps/chosen": -1.545915126800537, |
| "logps/rejected": -1.5207103490829468, |
| "loss": 5.8916, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -15.459149360656738, |
| "rewards/margins": -0.2520461976528168, |
| "rewards/rejected": -15.207103729248047, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01130901894260673, |
| "grad_norm": 77.5519625218596, |
| "learning_rate": 8.888888888888888e-08, |
| "logits/chosen": -1.0867865085601807, |
| "logits/rejected": -1.0832228660583496, |
| "logps/chosen": -1.5075905323028564, |
| "logps/rejected": -1.5380187034606934, |
| "loss": 5.3655, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -15.075907707214355, |
| "rewards/margins": 0.30427923798561096, |
| "rewards/rejected": -15.380186080932617, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013570822731128074, |
| "grad_norm": 78.11509558718444, |
| "learning_rate": 1.0666666666666666e-07, |
| "logits/chosen": -1.1473548412322998, |
| "logits/rejected": -1.1390419006347656, |
| "logps/chosen": -1.5627464056015015, |
| "logps/rejected": -1.5230021476745605, |
| "loss": 6.1162, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -15.627466201782227, |
| "rewards/margins": -0.39744287729263306, |
| "rewards/rejected": -15.230021476745605, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01583262651964942, |
| "grad_norm": 39.73625297036525, |
| "learning_rate": 1.2444444444444443e-07, |
| "logits/chosen": -1.0994057655334473, |
| "logits/rejected": -1.0783416032791138, |
| "logps/chosen": -1.4192826747894287, |
| "logps/rejected": -1.718736171722412, |
| "loss": 4.1576, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -14.192827224731445, |
| "rewards/margins": 2.9945356845855713, |
| "rewards/rejected": -17.187362670898438, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.018094430308170765, |
| "grad_norm": 114.77062454343967, |
| "learning_rate": 1.4222222222222222e-07, |
| "logits/chosen": -1.0515432357788086, |
| "logits/rejected": -1.0535235404968262, |
| "logps/chosen": -1.4979735612869263, |
| "logps/rejected": -1.5330562591552734, |
| "loss": 5.8127, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -14.979734420776367, |
| "rewards/margins": 0.35082772374153137, |
| "rewards/rejected": -15.330562591552734, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.020356234096692113, |
| "grad_norm": 96.07872744538972, |
| "learning_rate": 1.6e-07, |
| "logits/chosen": -1.1019177436828613, |
| "logits/rejected": -1.0908172130584717, |
| "logps/chosen": -1.4399257898330688, |
| "logps/rejected": -1.5934813022613525, |
| "loss": 5.4423, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -14.399259567260742, |
| "rewards/margins": 1.5355541706085205, |
| "rewards/rejected": -15.934813499450684, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02261803788521346, |
| "grad_norm": 146.26584785993413, |
| "learning_rate": 1.7777777777777776e-07, |
| "logits/chosen": -1.0987049341201782, |
| "logits/rejected": -1.1177351474761963, |
| "logps/chosen": -1.589949607849121, |
| "logps/rejected": -1.5490418672561646, |
| "loss": 5.7372, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -15.899496078491211, |
| "rewards/margins": -0.40907663106918335, |
| "rewards/rejected": -15.490419387817383, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.024879841673734804, |
| "grad_norm": 79.11204208374147, |
| "learning_rate": 1.9555555555555555e-07, |
| "logits/chosen": -1.1473506689071655, |
| "logits/rejected": -1.1570088863372803, |
| "logps/chosen": -1.5604548454284668, |
| "logps/rejected": -1.726046085357666, |
| "loss": 5.2868, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -15.6045503616333, |
| "rewards/margins": 1.6559122800827026, |
| "rewards/rejected": -17.260459899902344, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02714164546225615, |
| "grad_norm": 123.63916912843932, |
| "learning_rate": 2.133333333333333e-07, |
| "logits/chosen": -1.0692572593688965, |
| "logits/rejected": -1.0530564785003662, |
| "logps/chosen": -1.6708486080169678, |
| "logps/rejected": -1.7337902784347534, |
| "loss": 6.1169, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -16.708486557006836, |
| "rewards/margins": 0.6294152736663818, |
| "rewards/rejected": -17.337902069091797, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.029403449250777494, |
| "grad_norm": 139.28796689567102, |
| "learning_rate": 2.3111111111111107e-07, |
| "logits/chosen": -1.0673459768295288, |
| "logits/rejected": -1.053938865661621, |
| "logps/chosen": -1.6321990489959717, |
| "logps/rejected": -1.5502426624298096, |
| "loss": 6.0676, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -16.321990966796875, |
| "rewards/margins": -0.8195652365684509, |
| "rewards/rejected": -15.502425193786621, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03166525303929884, |
| "grad_norm": 44.86523130113871, |
| "learning_rate": 2.4888888888888886e-07, |
| "logits/chosen": -1.0727157592773438, |
| "logits/rejected": -1.0687270164489746, |
| "logps/chosen": -1.3144315481185913, |
| "logps/rejected": -1.5097585916519165, |
| "loss": 4.4316, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -13.144314765930176, |
| "rewards/margins": 1.9532725811004639, |
| "rewards/rejected": -15.097586631774902, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.033927056827820185, |
| "grad_norm": 119.40197659765802, |
| "learning_rate": 2.666666666666666e-07, |
| "logits/chosen": -1.072951078414917, |
| "logits/rejected": -1.0741289854049683, |
| "logps/chosen": -1.460188865661621, |
| "logps/rejected": -1.5374088287353516, |
| "loss": 5.1376, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -14.601886749267578, |
| "rewards/margins": 0.772199273109436, |
| "rewards/rejected": -15.374088287353516, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03618886061634153, |
| "grad_norm": 120.20221027013656, |
| "learning_rate": 2.8444444444444443e-07, |
| "logits/chosen": -1.1052724123001099, |
| "logits/rejected": -1.0987166166305542, |
| "logps/chosen": -1.544463872909546, |
| "logps/rejected": -1.5167737007141113, |
| "loss": 6.1293, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -15.444637298583984, |
| "rewards/margins": -0.27689969539642334, |
| "rewards/rejected": -15.16773796081543, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.038450664404862875, |
| "grad_norm": 65.69449437246068, |
| "learning_rate": 3.022222222222222e-07, |
| "logits/chosen": -1.1224839687347412, |
| "logits/rejected": -1.0999984741210938, |
| "logps/chosen": -1.3996049165725708, |
| "logps/rejected": -1.3617793321609497, |
| "loss": 6.0917, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -13.996048927307129, |
| "rewards/margins": -0.37825584411621094, |
| "rewards/rejected": -13.617794036865234, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04071246819338423, |
| "grad_norm": 99.76979777598639, |
| "learning_rate": 3.2e-07, |
| "logits/chosen": -1.1205981969833374, |
| "logits/rejected": -1.120253562927246, |
| "logps/chosen": -1.8338966369628906, |
| "logps/rejected": -1.8411056995391846, |
| "loss": 6.0657, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -18.338966369628906, |
| "rewards/margins": 0.07208935916423798, |
| "rewards/rejected": -18.411056518554688, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04297427198190557, |
| "grad_norm": 75.13215709008873, |
| "learning_rate": 3.3777777777777777e-07, |
| "logits/chosen": -1.1356143951416016, |
| "logits/rejected": -1.1374859809875488, |
| "logps/chosen": -1.5520572662353516, |
| "logps/rejected": -1.584758996963501, |
| "loss": 5.3015, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -15.5205717086792, |
| "rewards/margins": 0.327017605304718, |
| "rewards/rejected": -15.847589492797852, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04523607577042692, |
| "grad_norm": 44.2748498763636, |
| "learning_rate": 3.5555555555555553e-07, |
| "logits/chosen": -1.149074912071228, |
| "logits/rejected": -1.12880539894104, |
| "logps/chosen": -1.4242889881134033, |
| "logps/rejected": -1.6002920866012573, |
| "loss": 4.4667, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -14.242890357971191, |
| "rewards/margins": 1.760029673576355, |
| "rewards/rejected": -16.00292205810547, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04749787955894826, |
| "grad_norm": 69.54836323607135, |
| "learning_rate": 3.7333333333333334e-07, |
| "logits/chosen": -1.0379455089569092, |
| "logits/rejected": -1.0415663719177246, |
| "logps/chosen": -1.413461446762085, |
| "logps/rejected": -1.4350874423980713, |
| "loss": 5.2848, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -14.134614944458008, |
| "rewards/margins": 0.21626026928424835, |
| "rewards/rejected": -14.350875854492188, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04975968334746961, |
| "grad_norm": 101.12732993520753, |
| "learning_rate": 3.911111111111111e-07, |
| "logits/chosen": -1.0873618125915527, |
| "logits/rejected": -1.0630055665969849, |
| "logps/chosen": -1.478364109992981, |
| "logps/rejected": -1.5262987613677979, |
| "loss": 4.7711, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -14.783641815185547, |
| "rewards/margins": 0.4793458580970764, |
| "rewards/rejected": -15.262988090515137, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05202148713599095, |
| "grad_norm": 97.19117838044485, |
| "learning_rate": 4.0888888888888886e-07, |
| "logits/chosen": -1.0850725173950195, |
| "logits/rejected": -1.0555074214935303, |
| "logps/chosen": -1.4664888381958008, |
| "logps/rejected": -1.4977301359176636, |
| "loss": 5.4823, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -14.664888381958008, |
| "rewards/margins": 0.3124130368232727, |
| "rewards/rejected": -14.977302551269531, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0542832909245123, |
| "grad_norm": 86.40698470532111, |
| "learning_rate": 4.266666666666666e-07, |
| "logits/chosen": -1.0878976583480835, |
| "logits/rejected": -1.083939552307129, |
| "logps/chosen": -1.315569281578064, |
| "logps/rejected": -1.3708666563034058, |
| "loss": 5.082, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -13.155693054199219, |
| "rewards/margins": 0.552973210811615, |
| "rewards/rejected": -13.70866584777832, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05654509471303364, |
| "grad_norm": 87.79572082532306, |
| "learning_rate": 4.4444444444444444e-07, |
| "logits/chosen": -1.0942628383636475, |
| "logits/rejected": -1.091521978378296, |
| "logps/chosen": -1.6227660179138184, |
| "logps/rejected": -1.6117480993270874, |
| "loss": 5.7014, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -16.2276611328125, |
| "rewards/margins": -0.1101788878440857, |
| "rewards/rejected": -16.11747932434082, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05880689850155499, |
| "grad_norm": 268.4862749626735, |
| "learning_rate": 4.6222222222222214e-07, |
| "logits/chosen": -1.1140916347503662, |
| "logits/rejected": -1.0925090312957764, |
| "logps/chosen": -1.5979957580566406, |
| "logps/rejected": -1.5717800855636597, |
| "loss": 5.6624, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -15.97995662689209, |
| "rewards/margins": -0.2621573209762573, |
| "rewards/rejected": -15.71780014038086, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.061068702290076333, |
| "grad_norm": 79.4491817324606, |
| "learning_rate": 4.8e-07, |
| "logits/chosen": -1.1191110610961914, |
| "logits/rejected": -1.1132099628448486, |
| "logps/chosen": -1.6132346391677856, |
| "logps/rejected": -1.5380040407180786, |
| "loss": 6.0429, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -16.13234519958496, |
| "rewards/margins": -0.7523058652877808, |
| "rewards/rejected": -15.380041122436523, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06333050607859768, |
| "grad_norm": 66.73204327091015, |
| "learning_rate": 4.977777777777777e-07, |
| "logits/chosen": -1.1149988174438477, |
| "logits/rejected": -1.0995606184005737, |
| "logps/chosen": -1.4549816846847534, |
| "logps/rejected": -1.5254497528076172, |
| "loss": 5.5384, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -14.549816131591797, |
| "rewards/margins": 0.7046794891357422, |
| "rewards/rejected": -15.254497528076172, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06559230986711903, |
| "grad_norm": 70.83012837849557, |
| "learning_rate": 5.155555555555556e-07, |
| "logits/chosen": -1.1130647659301758, |
| "logits/rejected": -1.1251792907714844, |
| "logps/chosen": -1.5109716653823853, |
| "logps/rejected": -1.526107907295227, |
| "loss": 5.4054, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -15.10971736907959, |
| "rewards/margins": 0.1513628363609314, |
| "rewards/rejected": -15.261078834533691, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06785411365564037, |
| "grad_norm": 94.06032585695752, |
| "learning_rate": 5.333333333333332e-07, |
| "logits/chosen": -1.0764459371566772, |
| "logits/rejected": -1.078137993812561, |
| "logps/chosen": -1.5241881608963013, |
| "logps/rejected": -1.5383861064910889, |
| "loss": 5.9267, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -15.241884231567383, |
| "rewards/margins": 0.14197878539562225, |
| "rewards/rejected": -15.383862495422363, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07011591744416172, |
| "grad_norm": 75.08995684434343, |
| "learning_rate": 5.511111111111111e-07, |
| "logits/chosen": -1.126107096672058, |
| "logits/rejected": -1.1239315271377563, |
| "logps/chosen": -1.5170094966888428, |
| "logps/rejected": -1.4923768043518066, |
| "loss": 5.6393, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -15.170095443725586, |
| "rewards/margins": -0.24632781744003296, |
| "rewards/rejected": -14.92376708984375, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07237772123268306, |
| "grad_norm": 74.95824050979452, |
| "learning_rate": 5.688888888888889e-07, |
| "logits/chosen": -1.1436784267425537, |
| "logits/rejected": -1.1325445175170898, |
| "logps/chosen": -1.438338041305542, |
| "logps/rejected": -1.3806811571121216, |
| "loss": 5.9243, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -14.383380889892578, |
| "rewards/margins": -0.5765687227249146, |
| "rewards/rejected": -13.80681324005127, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07463952502120441, |
| "grad_norm": 57.484672836149315, |
| "learning_rate": 5.866666666666666e-07, |
| "logits/chosen": -1.0700812339782715, |
| "logits/rejected": -1.0605463981628418, |
| "logps/chosen": -1.412937879562378, |
| "logps/rejected": -1.5639235973358154, |
| "loss": 4.7378, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -14.129378318786621, |
| "rewards/margins": 1.5098581314086914, |
| "rewards/rejected": -15.639235496520996, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07690132880972575, |
| "grad_norm": 53.90732494351107, |
| "learning_rate": 6.044444444444444e-07, |
| "logits/chosen": -1.1256736516952515, |
| "logits/rejected": -1.084123134613037, |
| "logps/chosen": -1.3476636409759521, |
| "logps/rejected": -1.4755098819732666, |
| "loss": 4.5532, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -13.47663688659668, |
| "rewards/margins": 1.278462290763855, |
| "rewards/rejected": -14.75510025024414, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0791631325982471, |
| "grad_norm": 85.39623980816621, |
| "learning_rate": 6.222222222222223e-07, |
| "logits/chosen": -1.1434717178344727, |
| "logits/rejected": -1.1167579889297485, |
| "logps/chosen": -1.5166611671447754, |
| "logps/rejected": -1.6307806968688965, |
| "loss": 5.1443, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -15.166611671447754, |
| "rewards/margins": 1.1411969661712646, |
| "rewards/rejected": -16.30780792236328, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08142493638676845, |
| "grad_norm": 69.28128094162892, |
| "learning_rate": 6.4e-07, |
| "logits/chosen": -1.0718586444854736, |
| "logits/rejected": -1.0694938898086548, |
| "logps/chosen": -1.4529365301132202, |
| "logps/rejected": -1.576625108718872, |
| "loss": 4.986, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -14.529365539550781, |
| "rewards/margins": 1.236886978149414, |
| "rewards/rejected": -15.766251564025879, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08368674017528979, |
| "grad_norm": 63.685546020810015, |
| "learning_rate": 6.577777777777777e-07, |
| "logits/chosen": -1.0874630212783813, |
| "logits/rejected": -1.0730936527252197, |
| "logps/chosen": -1.308869481086731, |
| "logps/rejected": -1.319458246231079, |
| "loss": 5.2187, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -13.088695526123047, |
| "rewards/margins": 0.1058862954378128, |
| "rewards/rejected": -13.194581031799316, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08594854396381114, |
| "grad_norm": 85.27763093015075, |
| "learning_rate": 6.755555555555555e-07, |
| "logits/chosen": -1.1657055616378784, |
| "logits/rejected": -1.1689563989639282, |
| "logps/chosen": -1.4712262153625488, |
| "logps/rejected": -1.4614089727401733, |
| "loss": 5.4122, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -14.712262153625488, |
| "rewards/margins": -0.0981736034154892, |
| "rewards/rejected": -14.614089965820312, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08821034775233248, |
| "grad_norm": 72.53486427096175, |
| "learning_rate": 6.933333333333333e-07, |
| "logits/chosen": -1.158952236175537, |
| "logits/rejected": -1.1623462438583374, |
| "logps/chosen": -1.4649841785430908, |
| "logps/rejected": -1.446244478225708, |
| "loss": 5.4153, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -14.649843215942383, |
| "rewards/margins": -0.18739792704582214, |
| "rewards/rejected": -14.462443351745605, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09047215154085383, |
| "grad_norm": 48.05551639915771, |
| "learning_rate": 7.111111111111111e-07, |
| "logits/chosen": -1.1088396310806274, |
| "logits/rejected": -1.087456464767456, |
| "logps/chosen": -1.3251829147338867, |
| "logps/rejected": -1.482697606086731, |
| "loss": 4.2834, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -13.25182819366455, |
| "rewards/margins": 1.5751475095748901, |
| "rewards/rejected": -14.82697582244873, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09273395532937517, |
| "grad_norm": 41.08333243058533, |
| "learning_rate": 7.288888888888888e-07, |
| "logits/chosen": -1.1634094715118408, |
| "logits/rejected": -1.1473877429962158, |
| "logps/chosen": -1.2953405380249023, |
| "logps/rejected": -1.4355030059814453, |
| "loss": 4.393, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -12.95340633392334, |
| "rewards/margins": 1.401624321937561, |
| "rewards/rejected": -14.355029106140137, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09499575911789652, |
| "grad_norm": 81.6745151173038, |
| "learning_rate": 7.466666666666667e-07, |
| "logits/chosen": -1.0549430847167969, |
| "logits/rejected": -1.0252429246902466, |
| "logps/chosen": -1.3976647853851318, |
| "logps/rejected": -1.4610525369644165, |
| "loss": 5.1624, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -13.97664737701416, |
| "rewards/margins": 0.6338790059089661, |
| "rewards/rejected": -14.610527038574219, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09725756290641786, |
| "grad_norm": 64.66643703071367, |
| "learning_rate": 7.644444444444444e-07, |
| "logits/chosen": -1.1331276893615723, |
| "logits/rejected": -1.1149864196777344, |
| "logps/chosen": -1.3842030763626099, |
| "logps/rejected": -1.4057769775390625, |
| "loss": 5.1358, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -13.84203052520752, |
| "rewards/margins": 0.21573936939239502, |
| "rewards/rejected": -14.057769775390625, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09951936669493922, |
| "grad_norm": 50.21628916517668, |
| "learning_rate": 7.822222222222222e-07, |
| "logits/chosen": -1.0726534128189087, |
| "logits/rejected": -1.0697026252746582, |
| "logps/chosen": -1.2373145818710327, |
| "logps/rejected": -1.3352696895599365, |
| "loss": 4.3962, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -12.373147010803223, |
| "rewards/margins": 0.9795514941215515, |
| "rewards/rejected": -13.35269832611084, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10178117048346055, |
| "grad_norm": 43.75236482093407, |
| "learning_rate": 8e-07, |
| "logits/chosen": -1.1228159666061401, |
| "logits/rejected": -1.1156741380691528, |
| "logps/chosen": -1.2973405122756958, |
| "logps/rejected": -1.3789002895355225, |
| "loss": 4.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -12.973404884338379, |
| "rewards/margins": 0.8155972957611084, |
| "rewards/rejected": -13.78900146484375, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1040429742719819, |
| "grad_norm": 93.9343495458176, |
| "learning_rate": 7.999874759018868e-07, |
| "logits/chosen": -1.165191650390625, |
| "logits/rejected": -1.1482605934143066, |
| "logps/chosen": -1.5229721069335938, |
| "logps/rejected": -1.6199113130569458, |
| "loss": 4.7701, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -15.229720115661621, |
| "rewards/margins": 0.9693921804428101, |
| "rewards/rejected": -16.199111938476562, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10630477806050326, |
| "grad_norm": 66.34152836720595, |
| "learning_rate": 7.999499043918123e-07, |
| "logits/chosen": -1.1630305051803589, |
| "logits/rejected": -1.1713589429855347, |
| "logps/chosen": -1.3499112129211426, |
| "logps/rejected": -1.3848986625671387, |
| "loss": 5.0497, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -13.499112129211426, |
| "rewards/margins": 0.3498736023902893, |
| "rewards/rejected": -13.848986625671387, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1085665818490246, |
| "grad_norm": 73.12468967818121, |
| "learning_rate": 7.998872878225228e-07, |
| "logits/chosen": -1.104253888130188, |
| "logits/rejected": -1.0981318950653076, |
| "logps/chosen": -1.3970146179199219, |
| "logps/rejected": -1.4857858419418335, |
| "loss": 4.6942, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -13.970146179199219, |
| "rewards/margins": 0.8877115249633789, |
| "rewards/rejected": -14.857856750488281, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11082838563754595, |
| "grad_norm": 50.68026321922499, |
| "learning_rate": 7.997996301150987e-07, |
| "logits/chosen": -1.095520257949829, |
| "logits/rejected": -1.0906875133514404, |
| "logps/chosen": -1.3076212406158447, |
| "logps/rejected": -1.3888590335845947, |
| "loss": 4.6915, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -13.076210021972656, |
| "rewards/margins": 0.8123778700828552, |
| "rewards/rejected": -13.888589859008789, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11309018942606729, |
| "grad_norm": 54.48253221284768, |
| "learning_rate": 7.996869367587088e-07, |
| "logits/chosen": -1.0804747343063354, |
| "logits/rejected": -1.0651739835739136, |
| "logps/chosen": -1.357546329498291, |
| "logps/rejected": -1.4353337287902832, |
| "loss": 4.7526, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -13.575462341308594, |
| "rewards/margins": 0.7778746485710144, |
| "rewards/rejected": -14.353337287902832, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11535199321458864, |
| "grad_norm": 38.59484455862516, |
| "learning_rate": 7.99549214810266e-07, |
| "logits/chosen": -1.088415503501892, |
| "logits/rejected": -1.088612675666809, |
| "logps/chosen": -1.3685206174850464, |
| "logps/rejected": -1.4190219640731812, |
| "loss": 4.8571, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -13.68520450592041, |
| "rewards/margins": 0.5050145387649536, |
| "rewards/rejected": -14.19021987915039, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11761379700310998, |
| "grad_norm": 47.56303506310153, |
| "learning_rate": 7.993864728939867e-07, |
| "logits/chosen": -1.103301763534546, |
| "logits/rejected": -1.0828572511672974, |
| "logps/chosen": -1.3142025470733643, |
| "logps/rejected": -1.4140937328338623, |
| "loss": 4.8802, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -13.1420259475708, |
| "rewards/margins": 0.9989122748374939, |
| "rewards/rejected": -14.140937805175781, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11987560079163133, |
| "grad_norm": 53.48543744291103, |
| "learning_rate": 7.991987212008491e-07, |
| "logits/chosen": -1.1189554929733276, |
| "logits/rejected": -1.1071739196777344, |
| "logps/chosen": -1.3785709142684937, |
| "logps/rejected": -1.5353251695632935, |
| "loss": 4.5596, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -13.785710334777832, |
| "rewards/margins": 1.5675415992736816, |
| "rewards/rejected": -15.353252410888672, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12213740458015267, |
| "grad_norm": 54.3409563468225, |
| "learning_rate": 7.989859714879565e-07, |
| "logits/chosen": -1.139965534210205, |
| "logits/rejected": -1.1204081773757935, |
| "logps/chosen": -1.3048521280288696, |
| "logps/rejected": -1.372660756111145, |
| "loss": 4.7955, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -13.048519134521484, |
| "rewards/margins": 0.6780871748924255, |
| "rewards/rejected": -13.726606369018555, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12439920836867402, |
| "grad_norm": 44.10829510538185, |
| "learning_rate": 7.987482370778005e-07, |
| "logits/chosen": -1.1082535982131958, |
| "logits/rejected": -1.1038706302642822, |
| "logps/chosen": -1.3513308763504028, |
| "logps/rejected": -1.4602677822113037, |
| "loss": 4.725, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -13.513306617736816, |
| "rewards/margins": 1.0893704891204834, |
| "rewards/rejected": -14.602678298950195, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12666101215719536, |
| "grad_norm": 77.53742820558867, |
| "learning_rate": 7.984855328574262e-07, |
| "logits/chosen": -1.007509708404541, |
| "logits/rejected": -1.009194254875183, |
| "logps/chosen": -1.3011356592178345, |
| "logps/rejected": -1.372280478477478, |
| "loss": 4.6891, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -13.011357307434082, |
| "rewards/margins": 0.7114498615264893, |
| "rewards/rejected": -13.722806930541992, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1289228159457167, |
| "grad_norm": 70.85645949378882, |
| "learning_rate": 7.981978752775009e-07, |
| "logits/chosen": -1.0481213331222534, |
| "logits/rejected": -1.0459202527999878, |
| "logps/chosen": -1.3659950494766235, |
| "logps/rejected": -1.4737251996994019, |
| "loss": 4.7291, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -13.659952163696289, |
| "rewards/margins": 1.0772995948791504, |
| "rewards/rejected": -14.737251281738281, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13118461973423806, |
| "grad_norm": 83.8267776038953, |
| "learning_rate": 7.978852823512833e-07, |
| "logits/chosen": -1.1209564208984375, |
| "logits/rejected": -1.0898634195327759, |
| "logps/chosen": -1.447513461112976, |
| "logps/rejected": -1.5469727516174316, |
| "loss": 4.9365, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -14.475133895874023, |
| "rewards/margins": 0.9945943355560303, |
| "rewards/rejected": -15.469727516174316, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1334464235227594, |
| "grad_norm": 72.64611478190926, |
| "learning_rate": 7.975477736534957e-07, |
| "logits/chosen": -1.1025452613830566, |
| "logits/rejected": -1.109392523765564, |
| "logps/chosen": -1.4008920192718506, |
| "logps/rejected": -1.5584888458251953, |
| "loss": 4.51, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -14.00892162322998, |
| "rewards/margins": 1.5759669542312622, |
| "rewards/rejected": -15.584887504577637, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13570822731128074, |
| "grad_norm": 66.85917896473248, |
| "learning_rate": 7.971853703190986e-07, |
| "logits/chosen": -1.0982723236083984, |
| "logits/rejected": -1.092232584953308, |
| "logps/chosen": -1.3805067539215088, |
| "logps/rejected": -1.521033763885498, |
| "loss": 4.5318, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -13.805068969726562, |
| "rewards/margins": 1.4052679538726807, |
| "rewards/rejected": -15.210335731506348, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1379700310998021, |
| "grad_norm": 53.210256153607055, |
| "learning_rate": 7.967980950419664e-07, |
| "logits/chosen": -1.0485178232192993, |
| "logits/rejected": -1.037397027015686, |
| "logps/chosen": -1.2965946197509766, |
| "logps/rejected": -1.442001223564148, |
| "loss": 4.4137, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -12.965946197509766, |
| "rewards/margins": 1.4540655612945557, |
| "rewards/rejected": -14.420013427734375, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14023183488832344, |
| "grad_norm": 64.9229435001097, |
| "learning_rate": 7.963859720734669e-07, |
| "logits/chosen": -1.1149603128433228, |
| "logits/rejected": -1.1201238632202148, |
| "logps/chosen": -1.2722684144973755, |
| "logps/rejected": -1.4075822830200195, |
| "loss": 4.5023, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -12.722681999206543, |
| "rewards/margins": 1.3531394004821777, |
| "rewards/rejected": -14.075822830200195, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14249363867684478, |
| "grad_norm": 45.43996335712389, |
| "learning_rate": 7.959490272209427e-07, |
| "logits/chosen": -1.1015686988830566, |
| "logits/rejected": -1.079529047012329, |
| "logps/chosen": -1.2755954265594482, |
| "logps/rejected": -1.476697564125061, |
| "loss": 4.1012, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -12.755952835083008, |
| "rewards/margins": 2.0110244750976562, |
| "rewards/rejected": -14.766977310180664, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14475544246536612, |
| "grad_norm": 45.99686400612689, |
| "learning_rate": 7.954872878460946e-07, |
| "logits/chosen": -1.128149390220642, |
| "logits/rejected": -1.1002622842788696, |
| "logps/chosen": -1.3429501056671143, |
| "logps/rejected": -1.5083937644958496, |
| "loss": 4.2495, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -13.429499626159668, |
| "rewards/margins": 1.6544382572174072, |
| "rewards/rejected": -15.083937644958496, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14701724625388748, |
| "grad_norm": 58.04494616379713, |
| "learning_rate": 7.950007828632691e-07, |
| "logits/chosen": -1.074033498764038, |
| "logits/rejected": -1.083252191543579, |
| "logps/chosen": -1.3732857704162598, |
| "logps/rejected": -1.602651834487915, |
| "loss": 4.1484, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -13.732858657836914, |
| "rewards/margins": 2.2936599254608154, |
| "rewards/rejected": -16.026517868041992, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14927905004240882, |
| "grad_norm": 51.14410021137322, |
| "learning_rate": 7.944895427376465e-07, |
| "logits/chosen": -1.092671513557434, |
| "logits/rejected": -1.084201455116272, |
| "logps/chosen": -1.3794586658477783, |
| "logps/rejected": -1.6024820804595947, |
| "loss": 4.3135, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.794585227966309, |
| "rewards/margins": 2.2302355766296387, |
| "rewards/rejected": -16.024822235107422, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15154085383093016, |
| "grad_norm": 36.03692322063, |
| "learning_rate": 7.939535994833345e-07, |
| "logits/chosen": -1.0611392259597778, |
| "logits/rejected": -1.0569102764129639, |
| "logps/chosen": -1.2717268466949463, |
| "logps/rejected": -1.485985517501831, |
| "loss": 4.2175, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -12.717269897460938, |
| "rewards/margins": 2.1425867080688477, |
| "rewards/rejected": -14.859856605529785, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1538026576194515, |
| "grad_norm": 65.23896138368357, |
| "learning_rate": 7.933929866613628e-07, |
| "logits/chosen": -1.0750938653945923, |
| "logits/rejected": -1.0748298168182373, |
| "logps/chosen": -1.3082906007766724, |
| "logps/rejected": -1.4252880811691284, |
| "loss": 4.5562, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -13.082904815673828, |
| "rewards/margins": 1.1699758768081665, |
| "rewards/rejected": -14.25288200378418, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15606446140797287, |
| "grad_norm": 50.89066601450084, |
| "learning_rate": 7.928077393775808e-07, |
| "logits/chosen": -1.0674494504928589, |
| "logits/rejected": -1.0776114463806152, |
| "logps/chosen": -1.3522337675094604, |
| "logps/rejected": -1.6143879890441895, |
| "loss": 3.9809, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -13.522336959838867, |
| "rewards/margins": 2.621541738510132, |
| "rewards/rejected": -16.143878936767578, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1583262651964942, |
| "grad_norm": 126.95874714186452, |
| "learning_rate": 7.921978942804609e-07, |
| "logits/chosen": -1.0427839756011963, |
| "logits/rejected": -1.0468775033950806, |
| "logps/chosen": -1.3458898067474365, |
| "logps/rejected": -1.5510884523391724, |
| "loss": 4.1796, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -13.458898544311523, |
| "rewards/margins": 2.0519869327545166, |
| "rewards/rejected": -15.510884284973145, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16058806898501554, |
| "grad_norm": 55.63953868334949, |
| "learning_rate": 7.915634895588021e-07, |
| "logits/chosen": -1.0790458917617798, |
| "logits/rejected": -1.0586471557617188, |
| "logps/chosen": -1.4167431592941284, |
| "logps/rejected": -1.5353124141693115, |
| "loss": 4.7775, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -14.167430877685547, |
| "rewards/margins": 1.185691475868225, |
| "rewards/rejected": -15.35312271118164, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1628498727735369, |
| "grad_norm": 76.32984072024325, |
| "learning_rate": 7.909045649393394e-07, |
| "logits/chosen": -1.120489239692688, |
| "logits/rejected": -1.1161108016967773, |
| "logps/chosen": -1.3034402132034302, |
| "logps/rejected": -1.3763903379440308, |
| "loss": 4.7508, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -13.034402847290039, |
| "rewards/margins": 0.7294999361038208, |
| "rewards/rejected": -13.76390266418457, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16511167656205825, |
| "grad_norm": 54.86426522118373, |
| "learning_rate": 7.902211616842556e-07, |
| "logits/chosen": -1.08669912815094, |
| "logits/rejected": -1.0823296308517456, |
| "logps/chosen": -1.3619005680084229, |
| "logps/rejected": -1.573593258857727, |
| "loss": 4.3494, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -13.61900520324707, |
| "rewards/margins": 2.1169278621673584, |
| "rewards/rejected": -15.735933303833008, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16737348035057958, |
| "grad_norm": 58.4654706399478, |
| "learning_rate": 7.89513322588598e-07, |
| "logits/chosen": -1.0687835216522217, |
| "logits/rejected": -1.0629172325134277, |
| "logps/chosen": -1.3168197870254517, |
| "logps/rejected": -1.4600768089294434, |
| "loss": 4.1908, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -13.168198585510254, |
| "rewards/margins": 1.4325703382492065, |
| "rewards/rejected": -14.600768089294434, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16963528413910092, |
| "grad_norm": 53.919564210580624, |
| "learning_rate": 7.887810919775976e-07, |
| "logits/chosen": -1.0242153406143188, |
| "logits/rejected": -1.0200350284576416, |
| "logps/chosen": -1.3797943592071533, |
| "logps/rejected": -1.5162783861160278, |
| "loss": 4.4817, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -13.797942161560059, |
| "rewards/margins": 1.3648402690887451, |
| "rewards/rejected": -15.162782669067383, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1718970879276223, |
| "grad_norm": 39.76484434551506, |
| "learning_rate": 7.880245157038949e-07, |
| "logits/chosen": -1.1036596298217773, |
| "logits/rejected": -1.0872161388397217, |
| "logps/chosen": -1.375118613243103, |
| "logps/rejected": -1.5439777374267578, |
| "loss": 4.3267, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -13.751185417175293, |
| "rewards/margins": 1.688592553138733, |
| "rewards/rejected": -15.439779281616211, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17415889171614363, |
| "grad_norm": 74.50498370535034, |
| "learning_rate": 7.872436411446671e-07, |
| "logits/chosen": -1.1212602853775024, |
| "logits/rejected": -1.1364926099777222, |
| "logps/chosen": -1.4108389616012573, |
| "logps/rejected": -1.5181750059127808, |
| "loss": 4.8281, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -14.108390808105469, |
| "rewards/margins": 1.073359727859497, |
| "rewards/rejected": -15.18174934387207, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17642069550466496, |
| "grad_norm": 44.774679343796215, |
| "learning_rate": 7.86438517198662e-07, |
| "logits/chosen": -1.0375410318374634, |
| "logits/rejected": -1.0335216522216797, |
| "logps/chosen": -1.3055564165115356, |
| "logps/rejected": -1.4685771465301514, |
| "loss": 4.3614, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -13.055564880371094, |
| "rewards/margins": 1.6302083730697632, |
| "rewards/rejected": -14.685771942138672, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1786824992931863, |
| "grad_norm": 37.906288042223274, |
| "learning_rate": 7.856091942831366e-07, |
| "logits/chosen": -1.0268999338150024, |
| "logits/rejected": -1.042458415031433, |
| "logps/chosen": -1.3059368133544922, |
| "logps/rejected": -1.482313871383667, |
| "loss": 4.5709, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -13.059368133544922, |
| "rewards/margins": 1.7637701034545898, |
| "rewards/rejected": -14.823138236999512, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.18094430308170767, |
| "grad_norm": 48.0205012678544, |
| "learning_rate": 7.847557243306982e-07, |
| "logits/chosen": -1.119336724281311, |
| "logits/rejected": -1.1097991466522217, |
| "logps/chosen": -1.3474864959716797, |
| "logps/rejected": -1.4953199625015259, |
| "loss": 4.4092, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -13.474865913391113, |
| "rewards/margins": 1.4783344268798828, |
| "rewards/rejected": -14.953200340270996, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.183206106870229, |
| "grad_norm": 69.7852307914101, |
| "learning_rate": 7.838781607860541e-07, |
| "logits/chosen": -1.0976037979125977, |
| "logits/rejected": -1.0965137481689453, |
| "logps/chosen": -1.3701703548431396, |
| "logps/rejected": -1.5155431032180786, |
| "loss": 4.1212, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -13.701703071594238, |
| "rewards/margins": 1.4537272453308105, |
| "rewards/rejected": -15.155430793762207, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18546791065875035, |
| "grad_norm": 49.10698010184252, |
| "learning_rate": 7.82976558602664e-07, |
| "logits/chosen": -1.1253350973129272, |
| "logits/rejected": -1.1349576711654663, |
| "logps/chosen": -1.3088488578796387, |
| "logps/rejected": -1.435444712638855, |
| "loss": 4.4909, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -13.088489532470703, |
| "rewards/margins": 1.2659577131271362, |
| "rewards/rejected": -14.354446411132812, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1877297144472717, |
| "grad_norm": 63.38808359107094, |
| "learning_rate": 7.820509742392988e-07, |
| "logits/chosen": -1.1047896146774292, |
| "logits/rejected": -1.1108447313308716, |
| "logps/chosen": -1.4216302633285522, |
| "logps/rejected": -1.5547611713409424, |
| "loss": 4.3087, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -14.216300964355469, |
| "rewards/margins": 1.3313111066818237, |
| "rewards/rejected": -15.547612190246582, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.18999151823579305, |
| "grad_norm": 67.89717567725671, |
| "learning_rate": 7.811014656565054e-07, |
| "logits/chosen": -1.1079522371292114, |
| "logits/rejected": -1.0807424783706665, |
| "logps/chosen": -1.342353105545044, |
| "logps/rejected": -1.6123462915420532, |
| "loss": 3.9214, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -13.423532485961914, |
| "rewards/margins": 2.6999294757843018, |
| "rewards/rejected": -16.12346076965332, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1922533220243144, |
| "grad_norm": 59.34836842935611, |
| "learning_rate": 7.801280923129773e-07, |
| "logits/chosen": -1.0938023328781128, |
| "logits/rejected": -1.0850962400436401, |
| "logps/chosen": -1.3702034950256348, |
| "logps/rejected": -1.4707939624786377, |
| "loss": 4.8057, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.702035903930664, |
| "rewards/margins": 1.0059046745300293, |
| "rewards/rejected": -14.707940101623535, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19451512581283573, |
| "grad_norm": 63.12311188329623, |
| "learning_rate": 7.791309151618305e-07, |
| "logits/chosen": -1.1037566661834717, |
| "logits/rejected": -1.104620099067688, |
| "logps/chosen": -1.4397916793823242, |
| "logps/rejected": -1.5834070444107056, |
| "loss": 4.4345, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -14.397918701171875, |
| "rewards/margins": 1.4361515045166016, |
| "rewards/rejected": -15.834070205688477, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1967769296013571, |
| "grad_norm": 40.6034885299812, |
| "learning_rate": 7.781099966467874e-07, |
| "logits/chosen": -1.1172497272491455, |
| "logits/rejected": -1.1132512092590332, |
| "logps/chosen": -1.2978553771972656, |
| "logps/rejected": -1.4062166213989258, |
| "loss": 4.2868, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -12.978553771972656, |
| "rewards/margins": 1.0836100578308105, |
| "rewards/rejected": -14.062165260314941, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.19903873338987843, |
| "grad_norm": 82.98673857879416, |
| "learning_rate": 7.770654006982664e-07, |
| "logits/chosen": -1.1160707473754883, |
| "logits/rejected": -1.0892269611358643, |
| "logps/chosen": -1.4885170459747314, |
| "logps/rejected": -1.6167935132980347, |
| "loss": 4.7014, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -14.88516902923584, |
| "rewards/margins": 1.282766342163086, |
| "rewards/rejected": -16.16793441772461, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20130053717839977, |
| "grad_norm": 63.02089340256696, |
| "learning_rate": 7.759971927293781e-07, |
| "logits/chosen": -1.1214509010314941, |
| "logits/rejected": -1.1107975244522095, |
| "logps/chosen": -1.3759891986846924, |
| "logps/rejected": -1.5045208930969238, |
| "loss": 4.603, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -13.759891510009766, |
| "rewards/margins": 1.2853155136108398, |
| "rewards/rejected": -15.045208930969238, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2035623409669211, |
| "grad_norm": 49.39733815100318, |
| "learning_rate": 7.749054396318297e-07, |
| "logits/chosen": -1.121274709701538, |
| "logits/rejected": -1.104023814201355, |
| "logps/chosen": -1.4384340047836304, |
| "logps/rejected": -1.5760951042175293, |
| "loss": 4.4771, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -14.38433837890625, |
| "rewards/margins": 1.3766124248504639, |
| "rewards/rejected": -15.76095199584961, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20582414475544247, |
| "grad_norm": 85.82224666214906, |
| "learning_rate": 7.737902097717356e-07, |
| "logits/chosen": -1.0971518754959106, |
| "logits/rejected": -1.111463189125061, |
| "logps/chosen": -1.4152038097381592, |
| "logps/rejected": -1.6357877254486084, |
| "loss": 4.3825, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -14.15203857421875, |
| "rewards/margins": 2.2058396339416504, |
| "rewards/rejected": -16.357877731323242, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2080859485439638, |
| "grad_norm": 61.90199433280952, |
| "learning_rate": 7.726515729853367e-07, |
| "logits/chosen": -1.077009677886963, |
| "logits/rejected": -1.082908034324646, |
| "logps/chosen": -1.3816239833831787, |
| "logps/rejected": -1.4853577613830566, |
| "loss": 4.8364, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -13.816240310668945, |
| "rewards/margins": 1.037337303161621, |
| "rewards/rejected": -14.85357666015625, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.21034775233248515, |
| "grad_norm": 76.41393582381788, |
| "learning_rate": 7.714896005746272e-07, |
| "logits/chosen": -1.1236391067504883, |
| "logits/rejected": -1.117820143699646, |
| "logps/chosen": -1.3681647777557373, |
| "logps/rejected": -1.590654730796814, |
| "loss": 3.933, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -13.681647300720215, |
| "rewards/margins": 2.2248995304107666, |
| "rewards/rejected": -15.906549453735352, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21260955612100652, |
| "grad_norm": 77.71996625500955, |
| "learning_rate": 7.703043653028896e-07, |
| "logits/chosen": -1.1559141874313354, |
| "logits/rejected": -1.1570885181427002, |
| "logps/chosen": -1.53440523147583, |
| "logps/rejected": -1.6532174348831177, |
| "loss": 4.6511, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -15.3440523147583, |
| "rewards/margins": 1.1881229877471924, |
| "rewards/rejected": -16.53217315673828, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21487135990952785, |
| "grad_norm": 91.56807102764826, |
| "learning_rate": 7.690959413901379e-07, |
| "logits/chosen": -1.1174224615097046, |
| "logits/rejected": -1.1035161018371582, |
| "logps/chosen": -1.41274893283844, |
| "logps/rejected": -1.5510450601577759, |
| "loss": 4.3802, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -14.12748908996582, |
| "rewards/margins": 1.3829612731933594, |
| "rewards/rejected": -15.510449409484863, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2171331636980492, |
| "grad_norm": 67.88527134817444, |
| "learning_rate": 7.678644045084704e-07, |
| "logits/chosen": -1.0640089511871338, |
| "logits/rejected": -1.0812008380889893, |
| "logps/chosen": -1.365978479385376, |
| "logps/rejected": -1.5785081386566162, |
| "loss": 4.2054, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -13.659785270690918, |
| "rewards/margins": 2.125296115875244, |
| "rewards/rejected": -15.78508186340332, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.21939496748657053, |
| "grad_norm": 49.30327561622109, |
| "learning_rate": 7.666098317773308e-07, |
| "logits/chosen": -1.1126219034194946, |
| "logits/rejected": -1.1190177202224731, |
| "logps/chosen": -1.4845547676086426, |
| "logps/rejected": -1.6363489627838135, |
| "loss": 4.1985, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -14.84554672241211, |
| "rewards/margins": 1.517941951751709, |
| "rewards/rejected": -16.363489151000977, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2216567712750919, |
| "grad_norm": 69.91733204705244, |
| "learning_rate": 7.653323017586789e-07, |
| "logits/chosen": -1.140116572380066, |
| "logits/rejected": -1.1216413974761963, |
| "logps/chosen": -1.3246794939041138, |
| "logps/rejected": -1.4348188638687134, |
| "loss": 4.3538, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -13.246795654296875, |
| "rewards/margins": 1.101393699645996, |
| "rewards/rejected": -14.348188400268555, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.22391857506361323, |
| "grad_norm": 86.8761329888255, |
| "learning_rate": 7.640318944520711e-07, |
| "logits/chosen": -1.1340844631195068, |
| "logits/rejected": -1.1252467632293701, |
| "logps/chosen": -1.5068817138671875, |
| "logps/rejected": -1.6223900318145752, |
| "loss": 4.5239, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -15.068817138671875, |
| "rewards/margins": 1.1550840139389038, |
| "rewards/rejected": -16.223899841308594, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.22618037885213457, |
| "grad_norm": 66.95277093894374, |
| "learning_rate": 7.627086912896511e-07, |
| "logits/chosen": -1.0213607549667358, |
| "logits/rejected": -1.0508267879486084, |
| "logps/chosen": -1.3892170190811157, |
| "logps/rejected": -1.5458083152770996, |
| "loss": 4.1824, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -13.892169952392578, |
| "rewards/margins": 1.5659123659133911, |
| "rewards/rejected": -15.45808219909668, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2284421826406559, |
| "grad_norm": 58.53340577802912, |
| "learning_rate": 7.613627751310499e-07, |
| "logits/chosen": -1.1418393850326538, |
| "logits/rejected": -1.142421007156372, |
| "logps/chosen": -1.4400460720062256, |
| "logps/rejected": -1.584843635559082, |
| "loss": 4.061, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -14.40046215057373, |
| "rewards/margins": 1.4479742050170898, |
| "rewards/rejected": -15.84843635559082, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23070398642917728, |
| "grad_norm": 89.18045407035784, |
| "learning_rate": 7.599942302581977e-07, |
| "logits/chosen": -1.1121330261230469, |
| "logits/rejected": -1.1174899339675903, |
| "logps/chosen": -1.4459974765777588, |
| "logps/rejected": -1.680160403251648, |
| "loss": 3.9842, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -14.45997428894043, |
| "rewards/margins": 2.341628313064575, |
| "rewards/rejected": -16.80160140991211, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23296579021769862, |
| "grad_norm": 78.88418112061981, |
| "learning_rate": 7.586031423700457e-07, |
| "logits/chosen": -1.1099210977554321, |
| "logits/rejected": -1.1131954193115234, |
| "logps/chosen": -1.39324152469635, |
| "logps/rejected": -1.5408098697662354, |
| "loss": 4.4078, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -13.932414054870605, |
| "rewards/margins": 1.4756839275360107, |
| "rewards/rejected": -15.408098220825195, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23522759400621995, |
| "grad_norm": 78.97593612106348, |
| "learning_rate": 7.571895985772e-07, |
| "logits/chosen": -1.0611519813537598, |
| "logits/rejected": -1.0723930597305298, |
| "logps/chosen": -1.397983431816101, |
| "logps/rejected": -1.64972722530365, |
| "loss": 3.7839, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -13.979835510253906, |
| "rewards/margins": 2.517435073852539, |
| "rewards/rejected": -16.497272491455078, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23748939779474132, |
| "grad_norm": 79.0701709292454, |
| "learning_rate": 7.557536873964661e-07, |
| "logits/chosen": -1.1425997018814087, |
| "logits/rejected": -1.1292781829833984, |
| "logps/chosen": -1.6143113374710083, |
| "logps/rejected": -1.7253085374832153, |
| "loss": 4.753, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -16.143112182617188, |
| "rewards/margins": 1.1099728345870972, |
| "rewards/rejected": -17.253087997436523, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23975120158326266, |
| "grad_norm": 70.3983101177752, |
| "learning_rate": 7.542954987453069e-07, |
| "logits/chosen": -1.1226975917816162, |
| "logits/rejected": -1.1262807846069336, |
| "logps/chosen": -1.510130763053894, |
| "logps/rejected": -1.6568247079849243, |
| "loss": 4.1598, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -15.10130786895752, |
| "rewards/margins": 1.46694016456604, |
| "rewards/rejected": -16.568248748779297, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.242013005371784, |
| "grad_norm": 63.96492528100626, |
| "learning_rate": 7.528151239362108e-07, |
| "logits/chosen": -1.1100159883499146, |
| "logits/rejected": -1.11635422706604, |
| "logps/chosen": -1.5105023384094238, |
| "logps/rejected": -1.7230298519134521, |
| "loss": 4.028, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -15.105022430419922, |
| "rewards/margins": 2.1252756118774414, |
| "rewards/rejected": -17.23029899597168, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24427480916030533, |
| "grad_norm": 88.26352011162051, |
| "learning_rate": 7.513126556709748e-07, |
| "logits/chosen": -1.0872122049331665, |
| "logits/rejected": -1.0848742723464966, |
| "logps/chosen": -1.5043668746948242, |
| "logps/rejected": -1.8191593885421753, |
| "loss": 3.5723, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -15.04366683959961, |
| "rewards/margins": 3.147927761077881, |
| "rewards/rejected": -18.191593170166016, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2465366129488267, |
| "grad_norm": 72.88913803824948, |
| "learning_rate": 7.497881880348984e-07, |
| "logits/chosen": -1.053697109222412, |
| "logits/rejected": -1.0525224208831787, |
| "logps/chosen": -1.5000402927398682, |
| "logps/rejected": -1.7036010026931763, |
| "loss": 3.9378, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -15.000402450561523, |
| "rewards/margins": 2.035606622695923, |
| "rewards/rejected": -17.0360107421875, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.24879841673734804, |
| "grad_norm": 112.83565654609102, |
| "learning_rate": 7.482418164908931e-07, |
| "logits/chosen": -1.0978425741195679, |
| "logits/rejected": -1.1009314060211182, |
| "logps/chosen": -1.6112879514694214, |
| "logps/rejected": -1.7323018312454224, |
| "loss": 4.4843, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -16.112878799438477, |
| "rewards/margins": 1.210138201713562, |
| "rewards/rejected": -17.323017120361328, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2510602205258694, |
| "grad_norm": 86.85626491737465, |
| "learning_rate": 7.466736378735035e-07, |
| "logits/chosen": -1.078384280204773, |
| "logits/rejected": -1.079357624053955, |
| "logps/chosen": -1.5716272592544556, |
| "logps/rejected": -1.7571252584457397, |
| "loss": 4.0243, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -15.716273307800293, |
| "rewards/margins": 1.8549789190292358, |
| "rewards/rejected": -17.571250915527344, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2533220243143907, |
| "grad_norm": 86.39434107888039, |
| "learning_rate": 7.450837503828439e-07, |
| "logits/chosen": -1.0666810274124146, |
| "logits/rejected": -1.0605652332305908, |
| "logps/chosen": -1.6759812831878662, |
| "logps/rejected": -1.9359164237976074, |
| "loss": 3.9933, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -16.75981330871582, |
| "rewards/margins": 2.599350690841675, |
| "rewards/rejected": -19.359163284301758, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2555838281029121, |
| "grad_norm": 69.9794740418758, |
| "learning_rate": 7.43472253578449e-07, |
| "logits/chosen": -1.0734777450561523, |
| "logits/rejected": -1.0733259916305542, |
| "logps/chosen": -1.5143762826919556, |
| "logps/rejected": -1.7324830293655396, |
| "loss": 4.0494, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -15.143762588500977, |
| "rewards/margins": 2.181068181991577, |
| "rewards/rejected": -17.324831008911133, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2578456318914334, |
| "grad_norm": 81.18585812420585, |
| "learning_rate": 7.418392483730389e-07, |
| "logits/chosen": -1.0903693437576294, |
| "logits/rejected": -1.0884432792663574, |
| "logps/chosen": -1.6420326232910156, |
| "logps/rejected": -1.8970825672149658, |
| "loss": 3.9537, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -16.420326232910156, |
| "rewards/margins": 2.5504982471466064, |
| "rewards/rejected": -18.9708251953125, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26010743567995476, |
| "grad_norm": 79.65683808928875, |
| "learning_rate": 7.401848370262012e-07, |
| "logits/chosen": -1.125045657157898, |
| "logits/rejected": -1.1087158918380737, |
| "logps/chosen": -1.5817803144454956, |
| "logps/rejected": -1.7453052997589111, |
| "loss": 4.0128, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -15.817804336547852, |
| "rewards/margins": 1.635249376296997, |
| "rewards/rejected": -17.453052520751953, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2623692394684761, |
| "grad_norm": 82.03864787816394, |
| "learning_rate": 7.385091231379856e-07, |
| "logits/chosen": -1.109777808189392, |
| "logits/rejected": -1.1198936700820923, |
| "logps/chosen": -1.65959894657135, |
| "logps/rejected": -1.9032299518585205, |
| "loss": 3.9017, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -16.59598731994629, |
| "rewards/margins": 2.436310291290283, |
| "rewards/rejected": -19.032299041748047, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26463104325699743, |
| "grad_norm": 90.60602549341097, |
| "learning_rate": 7.368122116424182e-07, |
| "logits/chosen": -1.065422773361206, |
| "logits/rejected": -1.0693196058273315, |
| "logps/chosen": -1.7014392614364624, |
| "logps/rejected": -1.907637119293213, |
| "loss": 4.0746, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -17.014392852783203, |
| "rewards/margins": 2.061978340148926, |
| "rewards/rejected": -19.076370239257812, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2668928470455188, |
| "grad_norm": 86.03329980337816, |
| "learning_rate": 7.350942088009289e-07, |
| "logits/chosen": -1.1139557361602783, |
| "logits/rejected": -1.1093213558197021, |
| "logps/chosen": -1.69358229637146, |
| "logps/rejected": -1.916198492050171, |
| "loss": 3.5838, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -16.935823440551758, |
| "rewards/margins": 2.2261621952056885, |
| "rewards/rejected": -19.161983489990234, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.26915465083404017, |
| "grad_norm": 121.7554160666809, |
| "learning_rate": 7.333552221956986e-07, |
| "logits/chosen": -1.2222692966461182, |
| "logits/rejected": -1.2024728059768677, |
| "logps/chosen": -1.8368546962738037, |
| "logps/rejected": -2.0985753536224365, |
| "loss": 4.0214, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -18.368549346923828, |
| "rewards/margins": 2.6172049045562744, |
| "rewards/rejected": -20.98575210571289, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2714164546225615, |
| "grad_norm": 97.83952989713862, |
| "learning_rate": 7.315953607229217e-07, |
| "logits/chosen": -1.109398365020752, |
| "logits/rejected": -1.1084152460098267, |
| "logps/chosen": -1.907912254333496, |
| "logps/rejected": -2.1674695014953613, |
| "loss": 3.7429, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -19.07912254333496, |
| "rewards/margins": 2.595571517944336, |
| "rewards/rejected": -21.674694061279297, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27367825841108284, |
| "grad_norm": 105.07451107315077, |
| "learning_rate": 7.298147345859869e-07, |
| "logits/chosen": -1.111659049987793, |
| "logits/rejected": -1.1200050115585327, |
| "logps/chosen": -1.807603120803833, |
| "logps/rejected": -2.0426204204559326, |
| "loss": 3.8303, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -18.076032638549805, |
| "rewards/margins": 2.350172519683838, |
| "rewards/rejected": -20.426204681396484, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2759400621996042, |
| "grad_norm": 128.53073399523066, |
| "learning_rate": 7.280134552885762e-07, |
| "logits/chosen": -1.1038322448730469, |
| "logits/rejected": -1.0986474752426147, |
| "logps/chosen": -1.919103980064392, |
| "logps/rejected": -2.1694791316986084, |
| "loss": 4.1521, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -19.1910400390625, |
| "rewards/margins": 2.5037519931793213, |
| "rewards/rejected": -21.694791793823242, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2782018659881255, |
| "grad_norm": 124.69931513710597, |
| "learning_rate": 7.261916356276831e-07, |
| "logits/chosen": -1.0938494205474854, |
| "logits/rejected": -1.0867280960083008, |
| "logps/chosen": -1.9384150505065918, |
| "logps/rejected": -2.2772631645202637, |
| "loss": 3.0558, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -19.3841495513916, |
| "rewards/margins": 3.3884801864624023, |
| "rewards/rejected": -22.77263069152832, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2804636697766469, |
| "grad_norm": 112.01228179833498, |
| "learning_rate": 7.243493896865486e-07, |
| "logits/chosen": -1.0969176292419434, |
| "logits/rejected": -1.0823101997375488, |
| "logps/chosen": -1.8088492155075073, |
| "logps/rejected": -2.002607583999634, |
| "loss": 3.8824, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -18.088491439819336, |
| "rewards/margins": 1.9375840425491333, |
| "rewards/rejected": -20.02607536315918, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2827254735651682, |
| "grad_norm": 135.1348279059641, |
| "learning_rate": 7.224868328275169e-07, |
| "logits/chosen": -1.0725688934326172, |
| "logits/rejected": -1.06583833694458, |
| "logps/chosen": -1.9847272634506226, |
| "logps/rejected": -2.2174899578094482, |
| "loss": 4.0921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -19.847272872924805, |
| "rewards/margins": 2.3276259899139404, |
| "rewards/rejected": -22.174898147583008, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.28498727735368956, |
| "grad_norm": 183.11859236868796, |
| "learning_rate": 7.206040816848126e-07, |
| "logits/chosen": -1.0952653884887695, |
| "logits/rejected": -1.0870287418365479, |
| "logps/chosen": -2.140598773956299, |
| "logps/rejected": -2.3416907787323, |
| "loss": 4.3437, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -21.405988693237305, |
| "rewards/margins": 2.010920524597168, |
| "rewards/rejected": -23.416908264160156, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2872490811422109, |
| "grad_norm": 163.26760277117015, |
| "learning_rate": 7.187012541572356e-07, |
| "logits/chosen": -1.124993085861206, |
| "logits/rejected": -1.126251459121704, |
| "logps/chosen": -2.2181339263916016, |
| "logps/rejected": -2.5058629512786865, |
| "loss": 3.9287, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -22.18134307861328, |
| "rewards/margins": 2.8772897720336914, |
| "rewards/rejected": -25.058629989624023, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.28951088493073224, |
| "grad_norm": 150.6861946732737, |
| "learning_rate": 7.167784694007791e-07, |
| "logits/chosen": -1.0672762393951416, |
| "logits/rejected": -1.076225757598877, |
| "logps/chosen": -2.1517903804779053, |
| "logps/rejected": -2.513075351715088, |
| "loss": 3.5207, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -21.517902374267578, |
| "rewards/margins": 3.61285138130188, |
| "rewards/rejected": -25.130752563476562, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2917726887192536, |
| "grad_norm": 166.1334881985271, |
| "learning_rate": 7.148358478211682e-07, |
| "logits/chosen": -1.0861376523971558, |
| "logits/rejected": -1.0827029943466187, |
| "logps/chosen": -2.336635112762451, |
| "logps/rejected": -2.585222005844116, |
| "loss": 3.9166, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -23.366352081298828, |
| "rewards/margins": 2.4858686923980713, |
| "rewards/rejected": -25.85222053527832, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.29403449250777497, |
| "grad_norm": 129.64078380147677, |
| "learning_rate": 7.128735110663187e-07, |
| "logits/chosen": -1.0773652791976929, |
| "logits/rejected": -1.0514805316925049, |
| "logps/chosen": -2.0527725219726562, |
| "logps/rejected": -2.4218080043792725, |
| "loss": 3.2006, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -20.527725219726562, |
| "rewards/margins": 3.690356969833374, |
| "rewards/rejected": -24.218082427978516, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 185.67538718660288, |
| "learning_rate": 7.108915820187211e-07, |
| "logits/chosen": -1.0431915521621704, |
| "logits/rejected": -1.0388939380645752, |
| "logps/chosen": -2.42039155960083, |
| "logps/rejected": -2.778228998184204, |
| "loss": 3.9305, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -24.203916549682617, |
| "rewards/margins": 3.5783729553222656, |
| "rewards/rejected": -27.782289505004883, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.29855810008481765, |
| "grad_norm": 189.37316298985056, |
| "learning_rate": 7.088901847877447e-07, |
| "logits/chosen": -1.0450592041015625, |
| "logits/rejected": -1.0333521366119385, |
| "logps/chosen": -2.2116613388061523, |
| "logps/rejected": -2.4226627349853516, |
| "loss": 4.5748, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -22.116615295410156, |
| "rewards/margins": 2.110013961791992, |
| "rewards/rejected": -24.226627349853516, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.300819903873339, |
| "grad_norm": 148.7215180863099, |
| "learning_rate": 7.068694447018658e-07, |
| "logits/chosen": -1.0752545595169067, |
| "logits/rejected": -1.0728555917739868, |
| "logps/chosen": -2.185119390487671, |
| "logps/rejected": -2.5360453128814697, |
| "loss": 3.5117, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -21.851194381713867, |
| "rewards/margins": 3.5092573165893555, |
| "rewards/rejected": -25.360450744628906, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3030817076618603, |
| "grad_norm": 153.23013702965537, |
| "learning_rate": 7.048294883008199e-07, |
| "logits/chosen": -1.0524669885635376, |
| "logits/rejected": -1.052236795425415, |
| "logps/chosen": -2.0294673442840576, |
| "logps/rejected": -2.303767442703247, |
| "loss": 3.5238, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -20.294673919677734, |
| "rewards/margins": 2.7429981231689453, |
| "rewards/rejected": -23.037673950195312, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3053435114503817, |
| "grad_norm": 144.28413679223638, |
| "learning_rate": 7.027704433276776e-07, |
| "logits/chosen": -1.0182456970214844, |
| "logits/rejected": -1.0239580869674683, |
| "logps/chosen": -2.133883237838745, |
| "logps/rejected": -2.4184799194335938, |
| "loss": 3.9025, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -21.33883285522461, |
| "rewards/margins": 2.8459646701812744, |
| "rewards/rejected": -24.184799194335938, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.307605315238903, |
| "grad_norm": 179.86153387997342, |
| "learning_rate": 7.006924387208452e-07, |
| "logits/chosen": -1.047975778579712, |
| "logits/rejected": -1.0314030647277832, |
| "logps/chosen": -1.9791343212127686, |
| "logps/rejected": -2.2267491817474365, |
| "loss": 3.8151, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -19.791345596313477, |
| "rewards/margins": 2.4761476516723633, |
| "rewards/rejected": -22.267492294311523, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.30986711902742436, |
| "grad_norm": 121.33379388556347, |
| "learning_rate": 6.985956046059904e-07, |
| "logits/chosen": -1.0230764150619507, |
| "logits/rejected": -1.0363925695419312, |
| "logps/chosen": -1.897845983505249, |
| "logps/rejected": -2.19516921043396, |
| "loss": 3.7707, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -18.978456497192383, |
| "rewards/margins": 2.973233938217163, |
| "rewards/rejected": -21.951692581176758, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.31212892281594573, |
| "grad_norm": 112.00748336876892, |
| "learning_rate": 6.964800722878945e-07, |
| "logits/chosen": -0.996837854385376, |
| "logits/rejected": -0.9921685457229614, |
| "logps/chosen": -1.9219303131103516, |
| "logps/rejected": -2.291210651397705, |
| "loss": 3.3089, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -19.219301223754883, |
| "rewards/margins": 3.692802906036377, |
| "rewards/rejected": -22.912105560302734, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.31439072660446704, |
| "grad_norm": 91.27352193919032, |
| "learning_rate": 6.943459742422287e-07, |
| "logits/chosen": -1.022385835647583, |
| "logits/rejected": -0.9965115189552307, |
| "logps/chosen": -1.897507667541504, |
| "logps/rejected": -2.216675043106079, |
| "loss": 3.4424, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -18.975078582763672, |
| "rewards/margins": 3.1916732788085938, |
| "rewards/rejected": -22.166751861572266, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3166525303929884, |
| "grad_norm": 111.30209201573247, |
| "learning_rate": 6.921934441072597e-07, |
| "logits/chosen": -1.0870633125305176, |
| "logits/rejected": -1.074265480041504, |
| "logps/chosen": -2.0160953998565674, |
| "logps/rejected": -2.277557373046875, |
| "loss": 3.9955, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -20.16095542907715, |
| "rewards/margins": 2.6146204471588135, |
| "rewards/rejected": -22.77557373046875, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3189143341815098, |
| "grad_norm": 127.72259297295662, |
| "learning_rate": 6.900226166754807e-07, |
| "logits/chosen": -1.0401194095611572, |
| "logits/rejected": -1.0414453744888306, |
| "logps/chosen": -2.111356258392334, |
| "logps/rejected": -2.3192646503448486, |
| "loss": 4.2984, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -21.113563537597656, |
| "rewards/margins": 2.0790882110595703, |
| "rewards/rejected": -23.19264793395996, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3211761379700311, |
| "grad_norm": 109.41246623179207, |
| "learning_rate": 6.8783362788517e-07, |
| "logits/chosen": -1.041169285774231, |
| "logits/rejected": -1.0405552387237549, |
| "logps/chosen": -2.0705885887145996, |
| "logps/rejected": -2.3723855018615723, |
| "loss": 3.9914, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -20.70588493347168, |
| "rewards/margins": 3.017970561981201, |
| "rewards/rejected": -23.72385597229004, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32343794175855245, |
| "grad_norm": 101.80108756422902, |
| "learning_rate": 6.856266148118796e-07, |
| "logits/chosen": -1.0391225814819336, |
| "logits/rejected": -1.0372495651245117, |
| "logps/chosen": -1.9241631031036377, |
| "logps/rejected": -2.2478513717651367, |
| "loss": 3.4991, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -19.24163246154785, |
| "rewards/margins": 3.2368831634521484, |
| "rewards/rejected": -22.478511810302734, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3256997455470738, |
| "grad_norm": 126.22301243213639, |
| "learning_rate": 6.834017156598512e-07, |
| "logits/chosen": -1.0187711715698242, |
| "logits/rejected": -1.0043439865112305, |
| "logps/chosen": -2.097262382507324, |
| "logps/rejected": -2.361314535140991, |
| "loss": 3.7219, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -20.972625732421875, |
| "rewards/margins": 2.6405200958251953, |
| "rewards/rejected": -23.61314582824707, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3279615493355951, |
| "grad_norm": 111.89551066310779, |
| "learning_rate": 6.811590697533607e-07, |
| "logits/chosen": -1.086721420288086, |
| "logits/rejected": -1.0971835851669312, |
| "logps/chosen": -2.035705089569092, |
| "logps/rejected": -2.3083982467651367, |
| "loss": 3.568, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -20.357051849365234, |
| "rewards/margins": 2.7269287109375, |
| "rewards/rejected": -23.083980560302734, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3302233531241165, |
| "grad_norm": 148.3108673788691, |
| "learning_rate": 6.788988175279951e-07, |
| "logits/chosen": -1.0467808246612549, |
| "logits/rejected": -1.028620958328247, |
| "logps/chosen": -2.1247239112854004, |
| "logps/rejected": -2.4022562503814697, |
| "loss": 3.9603, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -21.247238159179688, |
| "rewards/margins": 2.7753214836120605, |
| "rewards/rejected": -24.02256202697754, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3324851569126378, |
| "grad_norm": 125.5433893020898, |
| "learning_rate": 6.766211005218577e-07, |
| "logits/chosen": -1.0311849117279053, |
| "logits/rejected": -1.0179343223571777, |
| "logps/chosen": -2.089470624923706, |
| "logps/rejected": -2.500403881072998, |
| "loss": 3.2191, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -20.894704818725586, |
| "rewards/margins": 4.109335422515869, |
| "rewards/rejected": -25.004039764404297, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33474696070115917, |
| "grad_norm": 118.88936796279769, |
| "learning_rate": 6.743260613667047e-07, |
| "logits/chosen": -1.0592498779296875, |
| "logits/rejected": -1.045177698135376, |
| "logps/chosen": -2.1444954872131348, |
| "logps/rejected": -2.4342246055603027, |
| "loss": 3.7747, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -21.44495391845703, |
| "rewards/margins": 2.8972933292388916, |
| "rewards/rejected": -24.342247009277344, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33700876448968053, |
| "grad_norm": 96.48894963535292, |
| "learning_rate": 6.720138437790139e-07, |
| "logits/chosen": -1.0325779914855957, |
| "logits/rejected": -1.0183120965957642, |
| "logps/chosen": -2.072295665740967, |
| "logps/rejected": -2.4342474937438965, |
| "loss": 3.1761, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -20.72295570373535, |
| "rewards/margins": 3.6195178031921387, |
| "rewards/rejected": -24.34247398376465, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.33927056827820185, |
| "grad_norm": 153.98600360046012, |
| "learning_rate": 6.696845925509848e-07, |
| "logits/chosen": -1.0699944496154785, |
| "logits/rejected": -1.0510475635528564, |
| "logps/chosen": -2.2458243370056152, |
| "logps/rejected": -2.5218453407287598, |
| "loss": 3.7126, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -22.458240509033203, |
| "rewards/margins": 2.7602086067199707, |
| "rewards/rejected": -25.21845245361328, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3415323720667232, |
| "grad_norm": 131.38558339494207, |
| "learning_rate": 6.673384535414718e-07, |
| "logits/chosen": -1.0725032091140747, |
| "logits/rejected": -1.0490397214889526, |
| "logps/chosen": -2.4079201221466064, |
| "logps/rejected": -2.6393277645111084, |
| "loss": 4.1844, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -24.079200744628906, |
| "rewards/margins": 2.3140788078308105, |
| "rewards/rejected": -26.39327621459961, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3437941758552446, |
| "grad_norm": 117.04940042869278, |
| "learning_rate": 6.649755736668511e-07, |
| "logits/chosen": -0.9817408323287964, |
| "logits/rejected": -0.9708501100540161, |
| "logps/chosen": -2.151355743408203, |
| "logps/rejected": -2.484079599380493, |
| "loss": 3.3881, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -21.51355743408203, |
| "rewards/margins": 3.327239513397217, |
| "rewards/rejected": -24.840797424316406, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3460559796437659, |
| "grad_norm": 117.23212029570279, |
| "learning_rate": 6.625961008918192e-07, |
| "logits/chosen": -1.007891297340393, |
| "logits/rejected": -0.9990096092224121, |
| "logps/chosen": -2.211634397506714, |
| "logps/rejected": -2.586132049560547, |
| "loss": 3.0739, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -22.116344451904297, |
| "rewards/margins": 3.744976043701172, |
| "rewards/rejected": -25.86132049560547, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.34831778343228725, |
| "grad_norm": 123.97516922142196, |
| "learning_rate": 6.602001842201289e-07, |
| "logits/chosen": -1.0259909629821777, |
| "logits/rejected": -1.0294607877731323, |
| "logps/chosen": -2.1726152896881104, |
| "logps/rejected": -2.435748338699341, |
| "loss": 3.9322, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -21.726154327392578, |
| "rewards/margins": 2.6313281059265137, |
| "rewards/rejected": -24.357481002807617, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3505795872208086, |
| "grad_norm": 122.24528395830801, |
| "learning_rate": 6.577879736852571e-07, |
| "logits/chosen": -1.0417340993881226, |
| "logits/rejected": -1.0337560176849365, |
| "logps/chosen": -2.2181789875030518, |
| "logps/rejected": -2.4941534996032715, |
| "loss": 3.6841, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -22.181791305541992, |
| "rewards/margins": 2.75974440574646, |
| "rewards/rejected": -24.94153594970703, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.35284139100932993, |
| "grad_norm": 97.83947309290298, |
| "learning_rate": 6.553596203410112e-07, |
| "logits/chosen": -1.0390684604644775, |
| "logits/rejected": -1.0227370262145996, |
| "logps/chosen": -2.2328336238861084, |
| "logps/rejected": -2.7284092903137207, |
| "loss": 2.639, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -22.32833480834961, |
| "rewards/margins": 4.9557576179504395, |
| "rewards/rejected": -27.284093856811523, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3551031947978513, |
| "grad_norm": 128.64043273189782, |
| "learning_rate": 6.529152762520688e-07, |
| "logits/chosen": -1.0631431341171265, |
| "logits/rejected": -1.0382766723632812, |
| "logps/chosen": -2.2645809650421143, |
| "logps/rejected": -2.6104514598846436, |
| "loss": 3.3993, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -22.645809173583984, |
| "rewards/margins": 3.4587042331695557, |
| "rewards/rejected": -26.104515075683594, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3573649985863726, |
| "grad_norm": 142.03788600510083, |
| "learning_rate": 6.504550944844558e-07, |
| "logits/chosen": -1.0133343935012817, |
| "logits/rejected": -1.0211207866668701, |
| "logps/chosen": -2.2236688137054443, |
| "logps/rejected": -2.5752692222595215, |
| "loss": 3.4598, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -22.2366886138916, |
| "rewards/margins": 3.5160036087036133, |
| "rewards/rejected": -25.7526912689209, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.359626802374894, |
| "grad_norm": 123.13104037411107, |
| "learning_rate": 6.479792290959613e-07, |
| "logits/chosen": -1.023786187171936, |
| "logits/rejected": -1.0143035650253296, |
| "logps/chosen": -2.2663955688476562, |
| "logps/rejected": -2.7522597312927246, |
| "loss": 3.1837, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -22.663957595825195, |
| "rewards/margins": 4.858642101287842, |
| "rewards/rejected": -27.52259635925293, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.36188860616341534, |
| "grad_norm": 116.507236487475, |
| "learning_rate": 6.454878351264906e-07, |
| "logits/chosen": -0.9985541105270386, |
| "logits/rejected": -0.9835253357887268, |
| "logps/chosen": -2.197329044342041, |
| "logps/rejected": -2.5955209732055664, |
| "loss": 3.1949, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -21.973289489746094, |
| "rewards/margins": 3.981919527053833, |
| "rewards/rejected": -25.955209732055664, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36415040995193665, |
| "grad_norm": 140.3290000030199, |
| "learning_rate": 6.429810685883565e-07, |
| "logits/chosen": -1.025122880935669, |
| "logits/rejected": -1.015618085861206, |
| "logps/chosen": -2.3687376976013184, |
| "logps/rejected": -2.7061593532562256, |
| "loss": 3.458, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -23.687376022338867, |
| "rewards/margins": 3.3742165565490723, |
| "rewards/rejected": -27.06159210205078, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.366412213740458, |
| "grad_norm": 165.90557536016226, |
| "learning_rate": 6.404590864565088e-07, |
| "logits/chosen": -0.9796350002288818, |
| "logits/rejected": -0.9656831622123718, |
| "logps/chosen": -2.3296120166778564, |
| "logps/rejected": -2.513373851776123, |
| "loss": 4.2251, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -23.296117782592773, |
| "rewards/margins": 1.8376156091690063, |
| "rewards/rejected": -25.133737564086914, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3686740175289794, |
| "grad_norm": 145.66517770093515, |
| "learning_rate": 6.379220466587063e-07, |
| "logits/chosen": -1.0169918537139893, |
| "logits/rejected": -0.9867813587188721, |
| "logps/chosen": -2.2477433681488037, |
| "logps/rejected": -2.6195108890533447, |
| "loss": 3.5149, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -22.477432250976562, |
| "rewards/margins": 3.717676877975464, |
| "rewards/rejected": -26.195110321044922, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3709358213175007, |
| "grad_norm": 144.08044730930845, |
| "learning_rate": 6.353701080656254e-07, |
| "logits/chosen": -0.9822530150413513, |
| "logits/rejected": -0.9942737221717834, |
| "logps/chosen": -2.3591365814208984, |
| "logps/rejected": -2.7208523750305176, |
| "loss": 3.378, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -23.591367721557617, |
| "rewards/margins": 3.6171576976776123, |
| "rewards/rejected": -27.208526611328125, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37319762510602206, |
| "grad_norm": 151.33264705138873, |
| "learning_rate": 6.32803430480913e-07, |
| "logits/chosen": -0.9973443746566772, |
| "logits/rejected": -0.9848815202713013, |
| "logps/chosen": -2.3127224445343018, |
| "logps/rejected": -2.695197582244873, |
| "loss": 3.5003, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -23.127225875854492, |
| "rewards/margins": 3.82474946975708, |
| "rewards/rejected": -26.951976776123047, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3754594288945434, |
| "grad_norm": 159.23717011250747, |
| "learning_rate": 6.302221746311782e-07, |
| "logits/chosen": -1.0382288694381714, |
| "logits/rejected": -1.0025843381881714, |
| "logps/chosen": -2.2013797760009766, |
| "logps/rejected": -2.51949405670166, |
| "loss": 4.0404, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -22.013797760009766, |
| "rewards/margins": 3.1811418533325195, |
| "rewards/rejected": -25.19493865966797, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.37772123268306473, |
| "grad_norm": 136.63350908805106, |
| "learning_rate": 6.276265021559288e-07, |
| "logits/chosen": -1.0501428842544556, |
| "logits/rejected": -1.0255956649780273, |
| "logps/chosen": -2.2914161682128906, |
| "logps/rejected": -2.5851945877075195, |
| "loss": 3.837, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -22.914161682128906, |
| "rewards/margins": 2.9377853870391846, |
| "rewards/rejected": -25.851943969726562, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3799830364715861, |
| "grad_norm": 106.76290330410582, |
| "learning_rate": 6.250165755974487e-07, |
| "logits/chosen": -0.973567008972168, |
| "logits/rejected": -0.9691902995109558, |
| "logps/chosen": -2.191209316253662, |
| "logps/rejected": -2.55265212059021, |
| "loss": 3.2144, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -21.912094116210938, |
| "rewards/margins": 3.6144275665283203, |
| "rewards/rejected": -25.52652359008789, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3822448402601074, |
| "grad_norm": 123.0165341553547, |
| "learning_rate": 6.223925583906192e-07, |
| "logits/chosen": -1.0723838806152344, |
| "logits/rejected": -1.068512201309204, |
| "logps/chosen": -2.2144222259521484, |
| "logps/rejected": -2.5826680660247803, |
| "loss": 3.2022, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -22.144222259521484, |
| "rewards/margins": 3.682457208633423, |
| "rewards/rejected": -25.826679229736328, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3845066440486288, |
| "grad_norm": 106.9659885956079, |
| "learning_rate": 6.19754614852685e-07, |
| "logits/chosen": -1.057454228401184, |
| "logits/rejected": -1.0504707098007202, |
| "logps/chosen": -2.0870509147644043, |
| "logps/rejected": -2.4082977771759033, |
| "loss": 3.341, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -20.870508193969727, |
| "rewards/margins": 3.212470531463623, |
| "rewards/rejected": -24.08298110961914, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.38676844783715014, |
| "grad_norm": 116.69942583640601, |
| "learning_rate": 6.171029101729644e-07, |
| "logits/chosen": -1.0107990503311157, |
| "logits/rejected": -0.997688353061676, |
| "logps/chosen": -2.2821381092071533, |
| "logps/rejected": -2.6844727993011475, |
| "loss": 3.3542, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -22.821382522583008, |
| "rewards/margins": 4.023346900939941, |
| "rewards/rejected": -26.8447322845459, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.38903025162567145, |
| "grad_norm": 112.64754228209283, |
| "learning_rate": 6.144376104025055e-07, |
| "logits/chosen": -1.078917145729065, |
| "logits/rejected": -1.0517069101333618, |
| "logps/chosen": -2.097365379333496, |
| "logps/rejected": -2.4820194244384766, |
| "loss": 3.0937, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -20.97365379333496, |
| "rewards/margins": 3.8465404510498047, |
| "rewards/rejected": -24.820194244384766, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3912920554141928, |
| "grad_norm": 110.79442729953392, |
| "learning_rate": 6.117588824436873e-07, |
| "logits/chosen": -1.0570931434631348, |
| "logits/rejected": -1.0704282522201538, |
| "logps/chosen": -2.193882703781128, |
| "logps/rejected": -2.4734573364257812, |
| "loss": 3.9527, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -21.938825607299805, |
| "rewards/margins": 2.795746326446533, |
| "rewards/rejected": -24.73457145690918, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3935538592027142, |
| "grad_norm": 137.95490469520266, |
| "learning_rate": 6.090668940397688e-07, |
| "logits/chosen": -1.0477267503738403, |
| "logits/rejected": -1.0384645462036133, |
| "logps/chosen": -2.2047643661499023, |
| "logps/rejected": -2.585783004760742, |
| "loss": 3.3294, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -22.047645568847656, |
| "rewards/margins": 3.810184955596924, |
| "rewards/rejected": -25.857830047607422, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3958156629912355, |
| "grad_norm": 127.29644669869833, |
| "learning_rate": 6.063618137643844e-07, |
| "logits/chosen": -1.022063970565796, |
| "logits/rejected": -1.0069043636322021, |
| "logps/chosen": -2.243839740753174, |
| "logps/rejected": -2.6155033111572266, |
| "loss": 3.4077, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -22.438400268554688, |
| "rewards/margins": 3.7166357040405273, |
| "rewards/rejected": -26.1550350189209, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.39807746677975686, |
| "grad_norm": 135.11493494004503, |
| "learning_rate": 6.03643811010988e-07, |
| "logits/chosen": -1.0559251308441162, |
| "logits/rejected": -1.063502311706543, |
| "logps/chosen": -2.3829030990600586, |
| "logps/rejected": -2.7540674209594727, |
| "loss": 3.177, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -23.829029083251953, |
| "rewards/margins": 3.7116451263427734, |
| "rewards/rejected": -27.540672302246094, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4003392705682782, |
| "grad_norm": 140.7638348318017, |
| "learning_rate": 6.009130559822453e-07, |
| "logits/chosen": -1.0719250440597534, |
| "logits/rejected": -1.0558902025222778, |
| "logps/chosen": -2.440748929977417, |
| "logps/rejected": -2.732268810272217, |
| "loss": 3.8044, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -24.407489776611328, |
| "rewards/margins": 2.9151973724365234, |
| "rewards/rejected": -27.322690963745117, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.40260107435679954, |
| "grad_norm": 145.03797185456023, |
| "learning_rate": 5.981697196793758e-07, |
| "logits/chosen": -1.0876305103302002, |
| "logits/rejected": -1.0896273851394653, |
| "logps/chosen": -2.5427262783050537, |
| "logps/rejected": -2.8704357147216797, |
| "loss": 3.2689, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -25.427263259887695, |
| "rewards/margins": 3.2770934104919434, |
| "rewards/rejected": -28.704357147216797, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4048628781453209, |
| "grad_norm": 142.03805172849428, |
| "learning_rate": 5.954139738914446e-07, |
| "logits/chosen": -1.064598560333252, |
| "logits/rejected": -1.0695425271987915, |
| "logps/chosen": -2.6064581871032715, |
| "logps/rejected": -2.963006019592285, |
| "loss": 3.7176, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -26.064579010009766, |
| "rewards/margins": 3.565481662750244, |
| "rewards/rejected": -29.630062103271484, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4071246819338422, |
| "grad_norm": 155.86905489974896, |
| "learning_rate": 5.92645991184605e-07, |
| "logits/chosen": -1.0406618118286133, |
| "logits/rejected": -1.022411584854126, |
| "logps/chosen": -2.7639245986938477, |
| "logps/rejected": -3.189260959625244, |
| "loss": 3.0644, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -27.639245986938477, |
| "rewards/margins": 4.253364562988281, |
| "rewards/rejected": -31.892608642578125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4093864857223636, |
| "grad_norm": 146.00773077090489, |
| "learning_rate": 5.898659448912917e-07, |
| "logits/chosen": -1.0187329053878784, |
| "logits/rejected": -1.0266443490982056, |
| "logps/chosen": -2.6189653873443604, |
| "logps/rejected": -3.0183205604553223, |
| "loss": 3.6161, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -26.18965721130371, |
| "rewards/margins": 3.9935495853424072, |
| "rewards/rejected": -30.183202743530273, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.41164828951088495, |
| "grad_norm": 143.88940929849588, |
| "learning_rate": 5.870740090993676e-07, |
| "logits/chosen": -1.054551124572754, |
| "logits/rejected": -1.0553083419799805, |
| "logps/chosen": -2.855097532272339, |
| "logps/rejected": -3.358206033706665, |
| "loss": 2.9768, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.550975799560547, |
| "rewards/margins": 5.031084060668945, |
| "rewards/rejected": -33.582061767578125, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.41391009329940626, |
| "grad_norm": 148.85713065388646, |
| "learning_rate": 5.842703586412214e-07, |
| "logits/chosen": -1.053299069404602, |
| "logits/rejected": -1.0449230670928955, |
| "logps/chosen": -2.9110286235809326, |
| "logps/rejected": -3.206702470779419, |
| "loss": 4.0637, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -29.110288619995117, |
| "rewards/margins": 2.956738233566284, |
| "rewards/rejected": -32.06702423095703, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4161718970879276, |
| "grad_norm": 167.29318192619908, |
| "learning_rate": 5.814551690828203e-07, |
| "logits/chosen": -1.0455509424209595, |
| "logits/rejected": -1.0376818180084229, |
| "logps/chosen": -2.828270673751831, |
| "logps/rejected": -3.220689296722412, |
| "loss": 3.2985, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.28270721435547, |
| "rewards/margins": 3.924184560775757, |
| "rewards/rejected": -32.20689010620117, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.418433700876449, |
| "grad_norm": 206.27636903137858, |
| "learning_rate": 5.786286167127155e-07, |
| "logits/chosen": -1.0653034448623657, |
| "logits/rejected": -1.057640552520752, |
| "logps/chosen": -2.7050979137420654, |
| "logps/rejected": -3.1350555419921875, |
| "loss": 3.1992, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.05097770690918, |
| "rewards/margins": 4.299577236175537, |
| "rewards/rejected": -31.350555419921875, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4206955046649703, |
| "grad_norm": 143.90302309104877, |
| "learning_rate": 5.757908785310031e-07, |
| "logits/chosen": -1.033769130706787, |
| "logits/rejected": -1.0227222442626953, |
| "logps/chosen": -2.5687739849090576, |
| "logps/rejected": -2.9693543910980225, |
| "loss": 3.4184, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -25.687742233276367, |
| "rewards/margins": 4.005804061889648, |
| "rewards/rejected": -29.69354248046875, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42295730845349166, |
| "grad_norm": 131.295957540962, |
| "learning_rate": 5.729421322382399e-07, |
| "logits/chosen": -1.0499193668365479, |
| "logits/rejected": -1.060524344444275, |
| "logps/chosen": -2.4598476886749268, |
| "logps/rejected": -2.8906612396240234, |
| "loss": 3.3257, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -24.598472595214844, |
| "rewards/margins": 4.3081374168396, |
| "rewards/rejected": -28.906612396240234, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42521911224201303, |
| "grad_norm": 136.38634729167853, |
| "learning_rate": 5.700825562243163e-07, |
| "logits/chosen": -1.0197397470474243, |
| "logits/rejected": -1.012385368347168, |
| "logps/chosen": -2.5251848697662354, |
| "logps/rejected": -2.9892866611480713, |
| "loss": 2.9243, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -25.251850128173828, |
| "rewards/margins": 4.641017436981201, |
| "rewards/rejected": -29.89286994934082, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.42748091603053434, |
| "grad_norm": 141.76629611664433, |
| "learning_rate": 5.672123295572854e-07, |
| "logits/chosen": -1.0829524993896484, |
| "logits/rejected": -1.0842368602752686, |
| "logps/chosen": -2.5519356727600098, |
| "logps/rejected": -2.897853136062622, |
| "loss": 2.8895, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -25.519359588623047, |
| "rewards/margins": 3.4591751098632812, |
| "rewards/rejected": -28.978532791137695, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4297427198190557, |
| "grad_norm": 156.54079418478554, |
| "learning_rate": 5.643316319721487e-07, |
| "logits/chosen": -1.044749140739441, |
| "logits/rejected": -1.0437251329421997, |
| "logps/chosen": -2.7124738693237305, |
| "logps/rejected": -3.0288543701171875, |
| "loss": 3.8523, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -27.124740600585938, |
| "rewards/margins": 3.163804292678833, |
| "rewards/rejected": -30.288543701171875, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.432004523607577, |
| "grad_norm": 152.22283290197944, |
| "learning_rate": 5.614406438596026e-07, |
| "logits/chosen": -1.0822639465332031, |
| "logits/rejected": -1.0667061805725098, |
| "logps/chosen": -2.801711320877075, |
| "logps/rejected": -3.1665806770324707, |
| "loss": 3.4512, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.017114639282227, |
| "rewards/margins": 3.6486923694610596, |
| "rewards/rejected": -31.665807723999023, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4342663273960984, |
| "grad_norm": 152.11043222529125, |
| "learning_rate": 5.585395462547406e-07, |
| "logits/chosen": -1.0402151346206665, |
| "logits/rejected": -1.0291682481765747, |
| "logps/chosen": -2.7880678176879883, |
| "logps/rejected": -3.064667224884033, |
| "loss": 3.7747, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -27.88067626953125, |
| "rewards/margins": 2.765998363494873, |
| "rewards/rejected": -30.64667510986328, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.43652813118461975, |
| "grad_norm": 142.00119985201334, |
| "learning_rate": 5.55628520825718e-07, |
| "logits/chosen": -1.0989923477172852, |
| "logits/rejected": -1.0788824558258057, |
| "logps/chosen": -2.6404459476470947, |
| "logps/rejected": -3.0510411262512207, |
| "loss": 2.9318, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -26.40445899963379, |
| "rewards/margins": 4.105955123901367, |
| "rewards/rejected": -30.510412216186523, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.43878993497314106, |
| "grad_norm": 139.7479873967227, |
| "learning_rate": 5.527077498623752e-07, |
| "logits/chosen": -1.0548537969589233, |
| "logits/rejected": -1.0540835857391357, |
| "logps/chosen": -2.691882610321045, |
| "logps/rejected": -3.0502240657806396, |
| "loss": 3.6926, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -26.9188289642334, |
| "rewards/margins": 3.5834131240844727, |
| "rewards/rejected": -30.502239227294922, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4410517387616624, |
| "grad_norm": 130.61284433987367, |
| "learning_rate": 5.497774162648228e-07, |
| "logits/chosen": -1.0161868333816528, |
| "logits/rejected": -1.0190304517745972, |
| "logps/chosen": -2.542428493499756, |
| "logps/rejected": -3.0524849891662598, |
| "loss": 3.1532, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -25.42428207397461, |
| "rewards/margins": 5.100566387176514, |
| "rewards/rejected": -30.52484893798828, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4433135425501838, |
| "grad_norm": 135.14144066906175, |
| "learning_rate": 5.468377035319882e-07, |
| "logits/chosen": -1.0638034343719482, |
| "logits/rejected": -1.0532487630844116, |
| "logps/chosen": -2.6114158630371094, |
| "logps/rejected": -3.115168809890747, |
| "loss": 3.2609, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.114160537719727, |
| "rewards/margins": 5.037529468536377, |
| "rewards/rejected": -31.151687622070312, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4455753463387051, |
| "grad_norm": 148.720911284201, |
| "learning_rate": 5.438887957501248e-07, |
| "logits/chosen": -0.9781689047813416, |
| "logits/rejected": -0.9678754806518555, |
| "logps/chosen": -2.449193239212036, |
| "logps/rejected": -2.8158748149871826, |
| "loss": 3.6472, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -24.491931915283203, |
| "rewards/margins": 3.6668155193328857, |
| "rewards/rejected": -28.15874671936035, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.44783715012722647, |
| "grad_norm": 135.90145162820608, |
| "learning_rate": 5.409308775812844e-07, |
| "logits/chosen": -1.0223724842071533, |
| "logits/rejected": -1.0113955736160278, |
| "logps/chosen": -2.5878636837005615, |
| "logps/rejected": -2.9643242359161377, |
| "loss": 3.6913, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -25.87863540649414, |
| "rewards/margins": 3.7646050453186035, |
| "rewards/rejected": -29.643238067626953, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.45009895391574783, |
| "grad_norm": 128.2704382853074, |
| "learning_rate": 5.379641342517541e-07, |
| "logits/chosen": -1.0288407802581787, |
| "logits/rejected": -1.0315312147140503, |
| "logps/chosen": -2.346052646636963, |
| "logps/rejected": -2.8313674926757812, |
| "loss": 3.1107, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -23.460527420043945, |
| "rewards/margins": 4.853147983551025, |
| "rewards/rejected": -28.313674926757812, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.45236075770426915, |
| "grad_norm": 138.9294365697912, |
| "learning_rate": 5.349887515404564e-07, |
| "logits/chosen": -1.008575677871704, |
| "logits/rejected": -1.0214340686798096, |
| "logps/chosen": -2.542800188064575, |
| "logps/rejected": -3.029541015625, |
| "loss": 3.1166, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -25.428003311157227, |
| "rewards/margins": 4.867405891418457, |
| "rewards/rejected": -30.29541015625, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4546225614927905, |
| "grad_norm": 117.1329832477415, |
| "learning_rate": 5.320049157673163e-07, |
| "logits/chosen": -0.9712215662002563, |
| "logits/rejected": -0.9573394656181335, |
| "logps/chosen": -2.3807926177978516, |
| "logps/rejected": -2.774594783782959, |
| "loss": 3.1467, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -23.807926177978516, |
| "rewards/margins": 3.938020706176758, |
| "rewards/rejected": -27.745946884155273, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4568843652813118, |
| "grad_norm": 141.22701148173041, |
| "learning_rate": 5.290128137815938e-07, |
| "logits/chosen": -1.0210527181625366, |
| "logits/rejected": -1.0222499370574951, |
| "logps/chosen": -2.4406933784484863, |
| "logps/rejected": -2.9088692665100098, |
| "loss": 2.8153, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -24.40693473815918, |
| "rewards/margins": 4.681759357452393, |
| "rewards/rejected": -29.088693618774414, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4591461690698332, |
| "grad_norm": 104.57362015283528, |
| "learning_rate": 5.260126329501828e-07, |
| "logits/chosen": -1.066743016242981, |
| "logits/rejected": -1.0438530445098877, |
| "logps/chosen": -2.399411201477051, |
| "logps/rejected": -2.9573822021484375, |
| "loss": 2.6011, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -23.99411392211914, |
| "rewards/margins": 5.579708576202393, |
| "rewards/rejected": -29.573822021484375, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.46140797285835455, |
| "grad_norm": 143.5907755591019, |
| "learning_rate": 5.230045611458789e-07, |
| "logits/chosen": -0.9814115762710571, |
| "logits/rejected": -0.9883652925491333, |
| "logps/chosen": -2.3635404109954834, |
| "logps/rejected": -2.725175380706787, |
| "loss": 3.4507, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -23.635404586791992, |
| "rewards/margins": 3.6163482666015625, |
| "rewards/rejected": -27.251752853393555, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.46366977664687586, |
| "grad_norm": 132.51043793956725, |
| "learning_rate": 5.199887867356143e-07, |
| "logits/chosen": -0.9847227334976196, |
| "logits/rejected": -0.9898078441619873, |
| "logps/chosen": -2.5268359184265137, |
| "logps/rejected": -2.994638442993164, |
| "loss": 3.0621, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -25.268360137939453, |
| "rewards/margins": 4.678021430969238, |
| "rewards/rejected": -29.946386337280273, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.46593158043539723, |
| "grad_norm": 118.9977147061269, |
| "learning_rate": 5.16965498568662e-07, |
| "logits/chosen": -1.0224330425262451, |
| "logits/rejected": -0.9982988834381104, |
| "logps/chosen": -2.616161584854126, |
| "logps/rejected": -3.237916946411133, |
| "loss": 2.9049, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -26.1616153717041, |
| "rewards/margins": 6.217552661895752, |
| "rewards/rejected": -32.37916946411133, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4681933842239186, |
| "grad_norm": 143.69066684123075, |
| "learning_rate": 5.139348859648098e-07, |
| "logits/chosen": -1.036367654800415, |
| "logits/rejected": -1.0168451070785522, |
| "logps/chosen": -2.4392778873443604, |
| "logps/rejected": -2.870711088180542, |
| "loss": 3.0652, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -24.39278221130371, |
| "rewards/margins": 4.314330101013184, |
| "rewards/rejected": -28.707111358642578, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4704551880124399, |
| "grad_norm": 140.65993366550816, |
| "learning_rate": 5.10897138702506e-07, |
| "logits/chosen": -0.9891340732574463, |
| "logits/rejected": -0.9894376993179321, |
| "logps/chosen": -2.548025608062744, |
| "logps/rejected": -2.9896531105041504, |
| "loss": 3.5331, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -25.48025894165039, |
| "rewards/margins": 4.4162702560424805, |
| "rewards/rejected": -29.896530151367188, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4727169918009613, |
| "grad_norm": 149.36997254654412, |
| "learning_rate": 5.078524470069743e-07, |
| "logits/chosen": -1.0524518489837646, |
| "logits/rejected": -1.0292545557022095, |
| "logps/chosen": -2.681795120239258, |
| "logps/rejected": -3.1556873321533203, |
| "loss": 3.1155, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -26.817949295043945, |
| "rewards/margins": 4.738921642303467, |
| "rewards/rejected": -31.556873321533203, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.47497879558948264, |
| "grad_norm": 155.2109822525701, |
| "learning_rate": 5.048010015383021e-07, |
| "logits/chosen": -0.9646722674369812, |
| "logits/rejected": -0.956778883934021, |
| "logps/chosen": -2.597956657409668, |
| "logps/rejected": -3.2177212238311768, |
| "loss": 2.8626, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -25.979564666748047, |
| "rewards/margins": 6.197646617889404, |
| "rewards/rejected": -32.177215576171875, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.47724059937800395, |
| "grad_norm": 147.3834849484295, |
| "learning_rate": 5.01742993379502e-07, |
| "logits/chosen": -1.0122888088226318, |
| "logits/rejected": -1.0171258449554443, |
| "logps/chosen": -2.6626272201538086, |
| "logps/rejected": -3.146449089050293, |
| "loss": 3.0977, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -26.626270294189453, |
| "rewards/margins": 4.838218688964844, |
| "rewards/rejected": -31.464488983154297, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4795024031665253, |
| "grad_norm": 159.19015701212336, |
| "learning_rate": 4.986786140245446e-07, |
| "logits/chosen": -0.9807606339454651, |
| "logits/rejected": -0.9683344960212708, |
| "logps/chosen": -2.5756638050079346, |
| "logps/rejected": -2.922246217727661, |
| "loss": 3.8934, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -25.756633758544922, |
| "rewards/margins": 3.4658255577087402, |
| "rewards/rejected": -29.222461700439453, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4817642069550466, |
| "grad_norm": 170.92750297621785, |
| "learning_rate": 4.956080553663687e-07, |
| "logits/chosen": -1.0279110670089722, |
| "logits/rejected": -1.0205495357513428, |
| "logps/chosen": -2.631155252456665, |
| "logps/rejected": -3.1095268726348877, |
| "loss": 3.4103, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.311552047729492, |
| "rewards/margins": 4.783715724945068, |
| "rewards/rejected": -31.09527015686035, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.484026010743568, |
| "grad_norm": 154.82473362445955, |
| "learning_rate": 4.925315096848636e-07, |
| "logits/chosen": -1.0190399885177612, |
| "logits/rejected": -1.014163613319397, |
| "logps/chosen": -2.682346820831299, |
| "logps/rejected": -3.2099146842956543, |
| "loss": 3.0444, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -26.823469161987305, |
| "rewards/margins": 5.2756781578063965, |
| "rewards/rejected": -32.099143981933594, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.48628781453208936, |
| "grad_norm": 138.22471006405996, |
| "learning_rate": 4.894491696348293e-07, |
| "logits/chosen": -1.0609517097473145, |
| "logits/rejected": -1.0692510604858398, |
| "logps/chosen": -2.570169448852539, |
| "logps/rejected": -2.9345059394836426, |
| "loss": 3.3457, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -25.701698303222656, |
| "rewards/margins": 3.6433632373809814, |
| "rewards/rejected": -29.345060348510742, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.48854961832061067, |
| "grad_norm": 132.4676990140604, |
| "learning_rate": 4.863612282339116e-07, |
| "logits/chosen": -0.968072235584259, |
| "logits/rejected": -0.9448983073234558, |
| "logps/chosen": -2.7336604595184326, |
| "logps/rejected": -3.2022647857666016, |
| "loss": 3.3316, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -27.336606979370117, |
| "rewards/margins": 4.6860432624816895, |
| "rewards/rejected": -32.02265167236328, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.49081142210913203, |
| "grad_norm": 130.33127389382147, |
| "learning_rate": 4.832678788505161e-07, |
| "logits/chosen": -0.9838683009147644, |
| "logits/rejected": -0.9567040801048279, |
| "logps/chosen": -2.637286901473999, |
| "logps/rejected": -3.1400976181030273, |
| "loss": 3.1326, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -26.37286949157715, |
| "rewards/margins": 5.028109550476074, |
| "rewards/rejected": -31.40097999572754, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4930732258976534, |
| "grad_norm": 200.20541042576428, |
| "learning_rate": 4.801693151916985e-07, |
| "logits/chosen": -1.0184892416000366, |
| "logits/rejected": -1.014449954032898, |
| "logps/chosen": -2.620845079421997, |
| "logps/rejected": -3.109973430633545, |
| "loss": 3.0453, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -26.20844841003418, |
| "rewards/margins": 4.891287326812744, |
| "rewards/rejected": -31.09973907470703, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4953350296861747, |
| "grad_norm": 124.9685928887746, |
| "learning_rate": 4.770657312910354e-07, |
| "logits/chosen": -1.0416370630264282, |
| "logits/rejected": -1.032057523727417, |
| "logps/chosen": -2.6119141578674316, |
| "logps/rejected": -3.0717544555664062, |
| "loss": 3.3908, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -26.119142532348633, |
| "rewards/margins": 4.5983991622924805, |
| "rewards/rejected": -30.71754264831543, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4975968334746961, |
| "grad_norm": 144.4680629722706, |
| "learning_rate": 4.739573214964729e-07, |
| "logits/chosen": -1.0071725845336914, |
| "logits/rejected": -0.979911208152771, |
| "logps/chosen": -2.5241236686706543, |
| "logps/rejected": -2.997404098510742, |
| "loss": 3.1178, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -25.241235733032227, |
| "rewards/margins": 4.732804298400879, |
| "rewards/rejected": -29.974040985107422, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49985863726321744, |
| "grad_norm": 222.12373832600284, |
| "learning_rate": 4.7084428045815733e-07, |
| "logits/chosen": -0.9902421832084656, |
| "logits/rejected": -0.9855415225028992, |
| "logps/chosen": -2.6917271614074707, |
| "logps/rejected": -3.020761013031006, |
| "loss": 4.0458, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -26.91727066040039, |
| "rewards/margins": 3.290342330932617, |
| "rewards/rejected": -30.20760726928711, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5021204410517388, |
| "grad_norm": 184.680260766697, |
| "learning_rate": 4.677268031162457e-07, |
| "logits/chosen": -1.0093257427215576, |
| "logits/rejected": -0.9966680407524109, |
| "logps/chosen": -2.5475189685821533, |
| "logps/rejected": -3.01365065574646, |
| "loss": 3.4698, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -25.475189208984375, |
| "rewards/margins": 4.66131591796875, |
| "rewards/rejected": -30.136505126953125, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5043822448402601, |
| "grad_norm": 136.79024356574675, |
| "learning_rate": 4.646050846886985e-07, |
| "logits/chosen": -0.9515697360038757, |
| "logits/rejected": -0.9518415331840515, |
| "logps/chosen": -2.4492499828338623, |
| "logps/rejected": -2.9192066192626953, |
| "loss": 3.2096, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -24.49249839782715, |
| "rewards/margins": 4.699567794799805, |
| "rewards/rejected": -29.19206428527832, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5066440486287814, |
| "grad_norm": 151.5169744750405, |
| "learning_rate": 4.6147932065905494e-07, |
| "logits/chosen": -1.019814133644104, |
| "logits/rejected": -1.0050256252288818, |
| "logps/chosen": -2.612035036087036, |
| "logps/rejected": -2.9731242656707764, |
| "loss": 3.5691, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -26.12034797668457, |
| "rewards/margins": 3.6108956336975098, |
| "rewards/rejected": -29.731239318847656, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5089058524173028, |
| "grad_norm": 146.9307990920379, |
| "learning_rate": 4.5834970676419214e-07, |
| "logits/chosen": -0.9980362057685852, |
| "logits/rejected": -0.981331467628479, |
| "logps/chosen": -2.5562005043029785, |
| "logps/rejected": -2.9508323669433594, |
| "loss": 3.4484, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -25.562007904052734, |
| "rewards/margins": 3.946317434310913, |
| "rewards/rejected": -29.508325576782227, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5111676562058242, |
| "grad_norm": 178.66761742959343, |
| "learning_rate": 4.552164389820673e-07, |
| "logits/chosen": -0.9223219156265259, |
| "logits/rejected": -0.9146152138710022, |
| "logps/chosen": -2.5326623916625977, |
| "logps/rejected": -3.0222249031066895, |
| "loss": 3.4509, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -25.326622009277344, |
| "rewards/margins": 4.89562463760376, |
| "rewards/rejected": -30.22224998474121, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5134294599943455, |
| "grad_norm": 188.01343368426728, |
| "learning_rate": 4.5207971351944605e-07, |
| "logits/chosen": -1.0626205205917358, |
| "logits/rejected": -1.0522465705871582, |
| "logps/chosen": -2.579554796218872, |
| "logps/rejected": -2.987112045288086, |
| "loss": 3.6329, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -25.795547485351562, |
| "rewards/margins": 4.075572967529297, |
| "rewards/rejected": -29.87112045288086, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5156912637828668, |
| "grad_norm": 159.4963136122297, |
| "learning_rate": 4.489397267996157e-07, |
| "logits/chosen": -1.0269495248794556, |
| "logits/rejected": -1.007673740386963, |
| "logps/chosen": -2.5031819343566895, |
| "logps/rejected": -2.9473769664764404, |
| "loss": 3.34, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -25.03182029724121, |
| "rewards/margins": 4.441953182220459, |
| "rewards/rejected": -29.473773956298828, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5179530675713881, |
| "grad_norm": 150.8053085996754, |
| "learning_rate": 4.45796675450085e-07, |
| "logits/chosen": -1.01454758644104, |
| "logits/rejected": -1.0115008354187012, |
| "logps/chosen": -2.4989662170410156, |
| "logps/rejected": -2.9577999114990234, |
| "loss": 3.4289, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -24.989662170410156, |
| "rewards/margins": 4.5883378982543945, |
| "rewards/rejected": -29.578001022338867, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5202148713599095, |
| "grad_norm": 159.36648564262296, |
| "learning_rate": 4.4265075629027126e-07, |
| "logits/chosen": -0.974044919013977, |
| "logits/rejected": -0.9666758179664612, |
| "logps/chosen": -2.5280728340148926, |
| "logps/rejected": -2.9372358322143555, |
| "loss": 3.1007, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -25.280729293823242, |
| "rewards/margins": 4.0916314125061035, |
| "rewards/rejected": -29.372360229492188, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5224766751484309, |
| "grad_norm": 226.02995114665168, |
| "learning_rate": 4.3950216631917563e-07, |
| "logits/chosen": -1.0299785137176514, |
| "logits/rejected": -1.032341718673706, |
| "logps/chosen": -2.4965872764587402, |
| "logps/rejected": -3.022507429122925, |
| "loss": 2.8711, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -24.96587371826172, |
| "rewards/margins": 5.259200572967529, |
| "rewards/rejected": -30.225072860717773, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5247384789369522, |
| "grad_norm": 165.05960224221946, |
| "learning_rate": 4.3635110270304676e-07, |
| "logits/chosen": -1.042232871055603, |
| "logits/rejected": -1.0289117097854614, |
| "logps/chosen": -2.436948537826538, |
| "logps/rejected": -2.940929889678955, |
| "loss": 2.3945, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -24.36948585510254, |
| "rewards/margins": 5.039816856384277, |
| "rewards/rejected": -29.4093017578125, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5270002827254736, |
| "grad_norm": 159.51910763447063, |
| "learning_rate": 4.331977627630339e-07, |
| "logits/chosen": -0.9937188029289246, |
| "logits/rejected": -0.9700920581817627, |
| "logps/chosen": -2.4370651245117188, |
| "logps/rejected": -2.9327409267425537, |
| "loss": 2.9749, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -24.370651245117188, |
| "rewards/margins": 4.956755638122559, |
| "rewards/rejected": -29.327407836914062, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5292620865139949, |
| "grad_norm": 139.67122586454525, |
| "learning_rate": 4.300423439628313e-07, |
| "logits/chosen": -1.0289289951324463, |
| "logits/rejected": -1.027557134628296, |
| "logps/chosen": -2.4969000816345215, |
| "logps/rejected": -3.048306941986084, |
| "loss": 2.6142, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -24.969003677368164, |
| "rewards/margins": 5.514064311981201, |
| "rewards/rejected": -30.48306655883789, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5315238903025162, |
| "grad_norm": 134.66456207195904, |
| "learning_rate": 4.268850438963118e-07, |
| "logits/chosen": -1.0312316417694092, |
| "logits/rejected": -1.038203239440918, |
| "logps/chosen": -2.631141424179077, |
| "logps/rejected": -3.1112723350524902, |
| "loss": 2.9671, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -26.311412811279297, |
| "rewards/margins": 4.801308631896973, |
| "rewards/rejected": -31.112722396850586, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5337856940910376, |
| "grad_norm": 162.1792467046527, |
| "learning_rate": 4.2372606027515463e-07, |
| "logits/chosen": -1.0173921585083008, |
| "logits/rejected": -1.0191435813903809, |
| "logps/chosen": -2.651998519897461, |
| "logps/rejected": -3.0730855464935303, |
| "loss": 3.3264, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.519981384277344, |
| "rewards/margins": 4.210873603820801, |
| "rewards/rejected": -30.73085594177246, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.536047497879559, |
| "grad_norm": 175.2406482243298, |
| "learning_rate": 4.2056559091646387e-07, |
| "logits/chosen": -1.0354706048965454, |
| "logits/rejected": -1.0305323600769043, |
| "logps/chosen": -2.6608736515045166, |
| "logps/rejected": -3.0917770862579346, |
| "loss": 3.4704, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -26.608734130859375, |
| "rewards/margins": 4.309037208557129, |
| "rewards/rejected": -30.917770385742188, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5383093016680803, |
| "grad_norm": 146.3851634765387, |
| "learning_rate": 4.1740383373038116e-07, |
| "logits/chosen": -1.0109094381332397, |
| "logits/rejected": -1.0172268152236938, |
| "logps/chosen": -2.564114809036255, |
| "logps/rejected": -2.9928038120269775, |
| "loss": 3.4799, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -25.641149520874023, |
| "rewards/margins": 4.28688907623291, |
| "rewards/rejected": -29.928035736083984, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5405711054566016, |
| "grad_norm": 169.57296943959173, |
| "learning_rate": 4.1424098670769255e-07, |
| "logits/chosen": -1.0650702714920044, |
| "logits/rejected": -1.0601589679718018, |
| "logps/chosen": -2.695429563522339, |
| "logps/rejected": -3.104177713394165, |
| "loss": 3.3077, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -26.954296112060547, |
| "rewards/margins": 4.0874810218811035, |
| "rewards/rejected": -31.041776657104492, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.542832909245123, |
| "grad_norm": 119.6967428650925, |
| "learning_rate": 4.1107724790743007e-07, |
| "logits/chosen": -0.9832959175109863, |
| "logits/rejected": -1.000196933746338, |
| "logps/chosen": -2.490891933441162, |
| "logps/rejected": -2.9166793823242188, |
| "loss": 2.9422, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -24.908920288085938, |
| "rewards/margins": 4.257873058319092, |
| "rewards/rejected": -29.166791915893555, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5450947130336443, |
| "grad_norm": 163.28408702057416, |
| "learning_rate": 4.0791281544446947e-07, |
| "logits/chosen": -1.053307294845581, |
| "logits/rejected": -1.0447359085083008, |
| "logps/chosen": -2.5901968479156494, |
| "logps/rejected": -3.1284067630767822, |
| "loss": 2.6637, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -25.901966094970703, |
| "rewards/margins": 5.382099628448486, |
| "rewards/rejected": -31.28406524658203, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5473565168221657, |
| "grad_norm": 148.41508302533668, |
| "learning_rate": 4.0474788747712416e-07, |
| "logits/chosen": -1.057266116142273, |
| "logits/rejected": -1.047303318977356, |
| "logps/chosen": -2.5654890537261963, |
| "logps/rejected": -2.987715244293213, |
| "loss": 3.2988, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -25.654890060424805, |
| "rewards/margins": 4.222264289855957, |
| "rewards/rejected": -29.877155303955078, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.549618320610687, |
| "grad_norm": 126.18312951161131, |
| "learning_rate": 4.0158266219473573e-07, |
| "logits/chosen": -1.0503863096237183, |
| "logits/rejected": -1.0432727336883545, |
| "logps/chosen": -2.4267337322235107, |
| "logps/rejected": -2.9202306270599365, |
| "loss": 2.9689, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -24.267337799072266, |
| "rewards/margins": 4.9349684715271, |
| "rewards/rejected": -29.202306747436523, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5518801243992084, |
| "grad_norm": 133.87077060972973, |
| "learning_rate": 3.984173378052643e-07, |
| "logits/chosen": -1.0356292724609375, |
| "logits/rejected": -1.0147960186004639, |
| "logps/chosen": -2.316190719604492, |
| "logps/rejected": -2.8582763671875, |
| "loss": 2.48, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -23.161909103393555, |
| "rewards/margins": 5.420856475830078, |
| "rewards/rejected": -28.582763671875, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5541419281877297, |
| "grad_norm": 142.54981649690544, |
| "learning_rate": 3.9525211252287585e-07, |
| "logits/chosen": -1.0919835567474365, |
| "logits/rejected": -1.0901732444763184, |
| "logps/chosen": -2.596813201904297, |
| "logps/rejected": -3.177374839782715, |
| "loss": 2.8792, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -25.968130111694336, |
| "rewards/margins": 5.805619716644287, |
| "rewards/rejected": -31.77375030517578, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.556403731976251, |
| "grad_norm": 128.27060507121888, |
| "learning_rate": 3.920871845555305e-07, |
| "logits/chosen": -1.0442081689834595, |
| "logits/rejected": -1.0269874334335327, |
| "logps/chosen": -2.5303421020507812, |
| "logps/rejected": -2.9623892307281494, |
| "loss": 2.9162, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -25.303421020507812, |
| "rewards/margins": 4.320469856262207, |
| "rewards/rejected": -29.623889923095703, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5586655357647724, |
| "grad_norm": 153.31496605164523, |
| "learning_rate": 3.8892275209256984e-07, |
| "logits/chosen": -1.0571988821029663, |
| "logits/rejected": -1.0373103618621826, |
| "logps/chosen": -2.6785757541656494, |
| "logps/rejected": -3.0567195415496826, |
| "loss": 3.0226, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -26.78575897216797, |
| "rewards/margins": 3.7814342975616455, |
| "rewards/rejected": -30.567195892333984, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5609273395532938, |
| "grad_norm": 171.8190367681467, |
| "learning_rate": 3.8575901329230747e-07, |
| "logits/chosen": -1.0109193325042725, |
| "logits/rejected": -0.9870609641075134, |
| "logps/chosen": -2.7195398807525635, |
| "logps/rejected": -3.1331217288970947, |
| "loss": 3.6614, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -27.19540023803711, |
| "rewards/margins": 4.135817527770996, |
| "rewards/rejected": -31.331218719482422, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5631891433418151, |
| "grad_norm": 140.4062037075952, |
| "learning_rate": 3.8259616626961886e-07, |
| "logits/chosen": -0.9998199939727783, |
| "logits/rejected": -1.00416898727417, |
| "logps/chosen": -2.5169920921325684, |
| "logps/rejected": -2.868487596511841, |
| "loss": 3.1507, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -25.169921875, |
| "rewards/margins": 3.5149550437927246, |
| "rewards/rejected": -28.68487548828125, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5654509471303364, |
| "grad_norm": 166.8389194279534, |
| "learning_rate": 3.794344090835362e-07, |
| "logits/chosen": -1.0278959274291992, |
| "logits/rejected": -1.0074037313461304, |
| "logps/chosen": -2.7008965015411377, |
| "logps/rejected": -3.127589225769043, |
| "loss": 3.4144, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -27.00896453857422, |
| "rewards/margins": 4.26693058013916, |
| "rewards/rejected": -31.275894165039062, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5677127509188578, |
| "grad_norm": 143.63770434185832, |
| "learning_rate": 3.7627393972484534e-07, |
| "logits/chosen": -1.1122283935546875, |
| "logits/rejected": -1.0987051725387573, |
| "logps/chosen": -2.713491439819336, |
| "logps/rejected": -3.113678216934204, |
| "loss": 3.2252, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.134912490844727, |
| "rewards/margins": 4.001870155334473, |
| "rewards/rejected": -31.136781692504883, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5699745547073791, |
| "grad_norm": 136.26224349022138, |
| "learning_rate": 3.7311495610368823e-07, |
| "logits/chosen": -1.0734153985977173, |
| "logits/rejected": -1.0677202939987183, |
| "logps/chosen": -2.799349784851074, |
| "logps/rejected": -3.266136646270752, |
| "loss": 3.0055, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.99349594116211, |
| "rewards/margins": 4.667870998382568, |
| "rewards/rejected": -32.6613655090332, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5722363584959005, |
| "grad_norm": 156.68909191083887, |
| "learning_rate": 3.699576560371689e-07, |
| "logits/chosen": -1.0107449293136597, |
| "logits/rejected": -1.012537956237793, |
| "logps/chosen": -2.8697638511657715, |
| "logps/rejected": -3.5351531505584717, |
| "loss": 2.4639, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.69763946533203, |
| "rewards/margins": 6.6538920402526855, |
| "rewards/rejected": -35.351531982421875, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5744981622844219, |
| "grad_norm": 150.83056174799268, |
| "learning_rate": 3.66802237236966e-07, |
| "logits/chosen": -1.0131436586380005, |
| "logits/rejected": -1.0157915353775024, |
| "logps/chosen": -2.8984572887420654, |
| "logps/rejected": -3.4090542793273926, |
| "loss": 3.0289, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.984573364257812, |
| "rewards/margins": 5.105963706970215, |
| "rewards/rejected": -34.090538024902344, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5767599660729432, |
| "grad_norm": 167.25008334154396, |
| "learning_rate": 3.636488972969532e-07, |
| "logits/chosen": -1.0094777345657349, |
| "logits/rejected": -1.0090457201004028, |
| "logps/chosen": -2.8125481605529785, |
| "logps/rejected": -3.2535414695739746, |
| "loss": 3.3245, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -28.1254825592041, |
| "rewards/margins": 4.409930229187012, |
| "rewards/rejected": -32.53541564941406, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5790217698614645, |
| "grad_norm": 141.29219706418925, |
| "learning_rate": 3.604978336808244e-07, |
| "logits/chosen": -1.140702724456787, |
| "logits/rejected": -1.1400625705718994, |
| "logps/chosen": -2.848879337310791, |
| "logps/rejected": -3.357835292816162, |
| "loss": 2.8014, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -28.48879051208496, |
| "rewards/margins": 5.089555740356445, |
| "rewards/rejected": -33.57835006713867, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5812835736499858, |
| "grad_norm": 151.14436444702108, |
| "learning_rate": 3.5734924370972876e-07, |
| "logits/chosen": -1.0457602739334106, |
| "logits/rejected": -1.0472681522369385, |
| "logps/chosen": -2.700531244277954, |
| "logps/rejected": -3.1960270404815674, |
| "loss": 2.7282, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -27.005313873291016, |
| "rewards/margins": 4.954959392547607, |
| "rewards/rejected": -31.96027183532715, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5835453774385072, |
| "grad_norm": 169.2294504356089, |
| "learning_rate": 3.5420332454991504e-07, |
| "logits/chosen": -1.0189040899276733, |
| "logits/rejected": -1.0160531997680664, |
| "logps/chosen": -2.818633556365967, |
| "logps/rejected": -3.2412967681884766, |
| "loss": 3.4916, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.186336517333984, |
| "rewards/margins": 4.226632118225098, |
| "rewards/rejected": -32.412967681884766, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5858071812270286, |
| "grad_norm": 168.96562111413766, |
| "learning_rate": 3.510602732003843e-07, |
| "logits/chosen": -1.0517528057098389, |
| "logits/rejected": -1.0637171268463135, |
| "logps/chosen": -3.009273052215576, |
| "logps/rejected": -3.4520859718322754, |
| "loss": 3.2834, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -30.092731475830078, |
| "rewards/margins": 4.428128719329834, |
| "rewards/rejected": -34.52085876464844, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5880689850155499, |
| "grad_norm": 154.95870708399784, |
| "learning_rate": 3.4792028648055396e-07, |
| "logits/chosen": -1.0257949829101562, |
| "logits/rejected": -1.0418014526367188, |
| "logps/chosen": -2.8583335876464844, |
| "logps/rejected": -3.3227787017822266, |
| "loss": 2.9362, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -28.583337783813477, |
| "rewards/margins": 4.644450664520264, |
| "rewards/rejected": -33.227787017822266, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5903307888040712, |
| "grad_norm": 152.99013472578545, |
| "learning_rate": 3.447835610179327e-07, |
| "logits/chosen": -1.0327140092849731, |
| "logits/rejected": -1.0097893476486206, |
| "logps/chosen": -2.8091721534729004, |
| "logps/rejected": -3.4519004821777344, |
| "loss": 2.827, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.091720581054688, |
| "rewards/margins": 6.427282333374023, |
| "rewards/rejected": -34.519004821777344, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 166.47320424747275, |
| "learning_rate": 3.416502932358079e-07, |
| "logits/chosen": -1.0839364528656006, |
| "logits/rejected": -1.0797481536865234, |
| "logps/chosen": -2.8693737983703613, |
| "logps/rejected": -3.1928672790527344, |
| "loss": 3.5401, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -28.69373893737793, |
| "rewards/margins": 3.2349326610565186, |
| "rewards/rejected": -31.92867088317871, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5948543963811139, |
| "grad_norm": 175.40553313980547, |
| "learning_rate": 3.385206793409451e-07, |
| "logits/chosen": -1.0378577709197998, |
| "logits/rejected": -1.0370153188705444, |
| "logps/chosen": -2.7698192596435547, |
| "logps/rejected": -3.1720666885375977, |
| "loss": 3.3164, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -27.698190689086914, |
| "rewards/margins": 4.0224761962890625, |
| "rewards/rejected": -31.72066879272461, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5971162001696353, |
| "grad_norm": 155.3543000873015, |
| "learning_rate": 3.3539491531130163e-07, |
| "logits/chosen": -1.0410065650939941, |
| "logits/rejected": -1.0408883094787598, |
| "logps/chosen": -2.8821988105773926, |
| "logps/rejected": -3.3631529808044434, |
| "loss": 3.193, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.82198715209961, |
| "rewards/margins": 4.809541702270508, |
| "rewards/rejected": -33.631526947021484, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5993780039581567, |
| "grad_norm": 149.20108555092546, |
| "learning_rate": 3.3227319688375426e-07, |
| "logits/chosen": -1.1061354875564575, |
| "logits/rejected": -1.0955908298492432, |
| "logps/chosen": -2.884066581726074, |
| "logps/rejected": -3.2610855102539062, |
| "loss": 3.2773, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -28.840667724609375, |
| "rewards/margins": 3.770188808441162, |
| "rewards/rejected": -32.61085891723633, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.601639807746678, |
| "grad_norm": 155.70578005343887, |
| "learning_rate": 3.291557195418427e-07, |
| "logits/chosen": -1.0915729999542236, |
| "logits/rejected": -1.0768842697143555, |
| "logps/chosen": -2.6985390186309814, |
| "logps/rejected": -3.0585601329803467, |
| "loss": 3.4593, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.985389709472656, |
| "rewards/margins": 3.600210666656494, |
| "rewards/rejected": -30.58559799194336, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6039016115351993, |
| "grad_norm": 219.43494306982168, |
| "learning_rate": 3.260426785035272e-07, |
| "logits/chosen": -1.11444890499115, |
| "logits/rejected": -1.1026936769485474, |
| "logps/chosen": -2.854384422302246, |
| "logps/rejected": -3.2280983924865723, |
| "loss": 3.7163, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -28.543846130371094, |
| "rewards/margins": 3.7371411323547363, |
| "rewards/rejected": -32.28098678588867, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6061634153237206, |
| "grad_norm": 171.107251475596, |
| "learning_rate": 3.229342687089646e-07, |
| "logits/chosen": -1.070772647857666, |
| "logits/rejected": -1.0768314599990845, |
| "logps/chosen": -2.7497973442077637, |
| "logps/rejected": -3.2370479106903076, |
| "loss": 3.0025, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -27.497974395751953, |
| "rewards/margins": 4.8725080490112305, |
| "rewards/rejected": -32.370479583740234, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.608425219112242, |
| "grad_norm": 171.5051536968982, |
| "learning_rate": 3.1983068480830143e-07, |
| "logits/chosen": -1.0967150926589966, |
| "logits/rejected": -1.0990163087844849, |
| "logps/chosen": -2.86303973197937, |
| "logps/rejected": -3.367943048477173, |
| "loss": 3.1547, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.63039779663086, |
| "rewards/margins": 5.04902982711792, |
| "rewards/rejected": -33.67942810058594, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6106870229007634, |
| "grad_norm": 151.3759521009327, |
| "learning_rate": 3.1673212114948387e-07, |
| "logits/chosen": -1.0704870223999023, |
| "logits/rejected": -1.0655865669250488, |
| "logps/chosen": -2.7373344898223877, |
| "logps/rejected": -3.2650370597839355, |
| "loss": 2.7578, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -27.37334632873535, |
| "rewards/margins": 5.277024269104004, |
| "rewards/rejected": -32.650367736816406, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6129488266892847, |
| "grad_norm": 183.96062606737704, |
| "learning_rate": 3.1363877176608845e-07, |
| "logits/chosen": -1.0521974563598633, |
| "logits/rejected": -1.0564298629760742, |
| "logps/chosen": -2.6424999237060547, |
| "logps/rejected": -3.1260673999786377, |
| "loss": 3.0961, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -26.42500114440918, |
| "rewards/margins": 4.835672378540039, |
| "rewards/rejected": -31.26067352294922, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.615210630477806, |
| "grad_norm": 146.7415619801789, |
| "learning_rate": 3.1055083036517076e-07, |
| "logits/chosen": -1.0500925779342651, |
| "logits/rejected": -1.0320236682891846, |
| "logps/chosen": -2.6332831382751465, |
| "logps/rejected": -3.151555299758911, |
| "loss": 2.8809, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -26.33283233642578, |
| "rewards/margins": 5.182720184326172, |
| "rewards/rejected": -31.51555061340332, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6174724342663274, |
| "grad_norm": 176.8059829778894, |
| "learning_rate": 3.074684903151364e-07, |
| "logits/chosen": -0.9690829515457153, |
| "logits/rejected": -0.9445469379425049, |
| "logps/chosen": -2.4842689037323, |
| "logps/rejected": -2.8961751461029053, |
| "loss": 2.9286, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -24.842689514160156, |
| "rewards/margins": 4.1190619468688965, |
| "rewards/rejected": -28.96175193786621, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6197342380548487, |
| "grad_norm": 174.11563926665636, |
| "learning_rate": 3.0439194463363136e-07, |
| "logits/chosen": -1.03439199924469, |
| "logits/rejected": -1.011659860610962, |
| "logps/chosen": -2.580949306488037, |
| "logps/rejected": -2.947218179702759, |
| "loss": 3.3772, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -25.809492111206055, |
| "rewards/margins": 3.6626861095428467, |
| "rewards/rejected": -29.47217559814453, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6219960418433701, |
| "grad_norm": 120.16739284187575, |
| "learning_rate": 3.0132138597545537e-07, |
| "logits/chosen": -1.0700979232788086, |
| "logits/rejected": -1.0875582695007324, |
| "logps/chosen": -2.760199546813965, |
| "logps/rejected": -3.3094594478607178, |
| "loss": 2.8477, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.601999282836914, |
| "rewards/margins": 5.4925971031188965, |
| "rewards/rejected": -33.0945930480957, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6242578456318915, |
| "grad_norm": 133.7458346644232, |
| "learning_rate": 2.982570066204981e-07, |
| "logits/chosen": -1.0688081979751587, |
| "logits/rejected": -1.0659135580062866, |
| "logps/chosen": -2.709754705429077, |
| "logps/rejected": -3.212181329727173, |
| "loss": 2.8696, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -27.097549438476562, |
| "rewards/margins": 5.024271011352539, |
| "rewards/rejected": -32.12181854248047, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6265196494204128, |
| "grad_norm": 194.94745882680402, |
| "learning_rate": 2.951989984616979e-07, |
| "logits/chosen": -1.0103790760040283, |
| "logits/rejected": -1.0189611911773682, |
| "logps/chosen": -2.788445472717285, |
| "logps/rejected": -3.306720018386841, |
| "loss": 3.2288, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -27.88445472717285, |
| "rewards/margins": 5.182745933532715, |
| "rewards/rejected": -33.06719970703125, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6287814532089341, |
| "grad_norm": 181.74990018566842, |
| "learning_rate": 2.9214755299302584e-07, |
| "logits/chosen": -0.9969690442085266, |
| "logits/rejected": -0.993459939956665, |
| "logps/chosen": -2.8557004928588867, |
| "logps/rejected": -3.3921194076538086, |
| "loss": 2.7074, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.5570068359375, |
| "rewards/margins": 5.3641886711120605, |
| "rewards/rejected": -33.92119598388672, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6310432569974554, |
| "grad_norm": 149.61408474184353, |
| "learning_rate": 2.89102861297494e-07, |
| "logits/chosen": -1.0359619855880737, |
| "logits/rejected": -1.0459537506103516, |
| "logps/chosen": -2.7723605632781982, |
| "logps/rejected": -3.2539467811584473, |
| "loss": 3.182, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -27.72360610961914, |
| "rewards/margins": 4.815859794616699, |
| "rewards/rejected": -32.539466857910156, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6333050607859768, |
| "grad_norm": 195.39013018559334, |
| "learning_rate": 2.860651140351902e-07, |
| "logits/chosen": -1.037549614906311, |
| "logits/rejected": -1.0350432395935059, |
| "logps/chosen": -2.8769359588623047, |
| "logps/rejected": -3.420259475708008, |
| "loss": 2.9818, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.769359588623047, |
| "rewards/margins": 5.433237552642822, |
| "rewards/rejected": -34.20259475708008, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6355668645744982, |
| "grad_norm": 155.92948546147593, |
| "learning_rate": 2.830345014313381e-07, |
| "logits/chosen": -0.9779025912284851, |
| "logits/rejected": -0.9906786680221558, |
| "logps/chosen": -2.9215312004089355, |
| "logps/rejected": -3.4977033138275146, |
| "loss": 2.6284, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -29.215312957763672, |
| "rewards/margins": 5.761720180511475, |
| "rewards/rejected": -34.97703552246094, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6378286683630195, |
| "grad_norm": 187.88083261637343, |
| "learning_rate": 2.800112132643856e-07, |
| "logits/chosen": -1.0564908981323242, |
| "logits/rejected": -1.0514881610870361, |
| "logps/chosen": -2.9547762870788574, |
| "logps/rejected": -3.5844526290893555, |
| "loss": 2.8202, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.547761917114258, |
| "rewards/margins": 6.2967658042907715, |
| "rewards/rejected": -35.84452819824219, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6400904721515408, |
| "grad_norm": 156.47358710093474, |
| "learning_rate": 2.7699543885412105e-07, |
| "logits/chosen": -1.0226179361343384, |
| "logits/rejected": -1.0112264156341553, |
| "logps/chosen": -2.949159622192383, |
| "logps/rejected": -3.5769615173339844, |
| "loss": 2.5308, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -29.491596221923828, |
| "rewards/margins": 6.278021812438965, |
| "rewards/rejected": -35.76961898803711, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6423522759400622, |
| "grad_norm": 153.99253553149956, |
| "learning_rate": 2.7398736704981725e-07, |
| "logits/chosen": -1.034913182258606, |
| "logits/rejected": -1.0078083276748657, |
| "logps/chosen": -3.0782995223999023, |
| "logps/rejected": -3.6696910858154297, |
| "loss": 2.6226, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -30.78299331665039, |
| "rewards/margins": 5.913917541503906, |
| "rewards/rejected": -36.6969108581543, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6446140797285835, |
| "grad_norm": 222.1159065771539, |
| "learning_rate": 2.709871862184063e-07, |
| "logits/chosen": -1.0211516618728638, |
| "logits/rejected": -1.0156588554382324, |
| "logps/chosen": -2.9046590328216553, |
| "logps/rejected": -3.4058563709259033, |
| "loss": 3.3323, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -29.046588897705078, |
| "rewards/margins": 5.011976718902588, |
| "rewards/rejected": -34.05856704711914, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6468758835171049, |
| "grad_norm": 156.58411783215192, |
| "learning_rate": 2.679950842326837e-07, |
| "logits/chosen": -1.055216908454895, |
| "logits/rejected": -1.0356172323226929, |
| "logps/chosen": -2.966404676437378, |
| "logps/rejected": -3.566565990447998, |
| "loss": 2.673, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -29.664047241210938, |
| "rewards/margins": 6.001612186431885, |
| "rewards/rejected": -35.66565704345703, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6491376873056263, |
| "grad_norm": 131.25191153891365, |
| "learning_rate": 2.6501124845954363e-07, |
| "logits/chosen": -1.0120482444763184, |
| "logits/rejected": -1.0116891860961914, |
| "logps/chosen": -2.8681583404541016, |
| "logps/rejected": -3.502807140350342, |
| "loss": 2.3787, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -28.68158531188965, |
| "rewards/margins": 6.346485137939453, |
| "rewards/rejected": -35.02806854248047, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6513994910941476, |
| "grad_norm": 182.00067587268094, |
| "learning_rate": 2.62035865748246e-07, |
| "logits/chosen": -0.9981238842010498, |
| "logits/rejected": -0.9938050508499146, |
| "logps/chosen": -2.7982468605041504, |
| "logps/rejected": -3.3067078590393066, |
| "loss": 2.9436, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.982467651367188, |
| "rewards/margins": 5.084610462188721, |
| "rewards/rejected": -33.06707763671875, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6536612948826689, |
| "grad_norm": 158.2959824838079, |
| "learning_rate": 2.5906912241871554e-07, |
| "logits/chosen": -1.0856727361679077, |
| "logits/rejected": -1.0612850189208984, |
| "logps/chosen": -2.90170955657959, |
| "logps/rejected": -3.4285690784454346, |
| "loss": 2.8793, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.0170955657959, |
| "rewards/margins": 5.268592357635498, |
| "rewards/rejected": -34.28569030761719, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6559230986711903, |
| "grad_norm": 187.9726365909188, |
| "learning_rate": 2.561112042498753e-07, |
| "logits/chosen": -0.992550253868103, |
| "logits/rejected": -0.9997091293334961, |
| "logps/chosen": -2.7526485919952393, |
| "logps/rejected": -3.165712594985962, |
| "loss": 3.721, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -27.526485443115234, |
| "rewards/margins": 4.130640029907227, |
| "rewards/rejected": -31.65712547302246, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6581849024597116, |
| "grad_norm": 143.79444839334136, |
| "learning_rate": 2.5316229646801195e-07, |
| "logits/chosen": -0.9941728115081787, |
| "logits/rejected": -0.992635190486908, |
| "logps/chosen": -2.9787821769714355, |
| "logps/rejected": -3.4489240646362305, |
| "loss": 2.7863, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -29.787822723388672, |
| "rewards/margins": 4.701417922973633, |
| "rewards/rejected": -34.48924255371094, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.660446706248233, |
| "grad_norm": 153.0517998301082, |
| "learning_rate": 2.5022258373517714e-07, |
| "logits/chosen": -1.104297161102295, |
| "logits/rejected": -1.0981920957565308, |
| "logps/chosen": -2.7750794887542725, |
| "logps/rejected": -3.223928451538086, |
| "loss": 2.9225, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -27.75079345703125, |
| "rewards/margins": 4.488492012023926, |
| "rewards/rejected": -32.23928451538086, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6627085100367544, |
| "grad_norm": 153.94026088368992, |
| "learning_rate": 2.4729225013762474e-07, |
| "logits/chosen": -1.143121361732483, |
| "logits/rejected": -1.142082929611206, |
| "logps/chosen": -3.0029547214508057, |
| "logps/rejected": -3.4995949268341064, |
| "loss": 3.0848, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -30.0295467376709, |
| "rewards/margins": 4.966399669647217, |
| "rewards/rejected": -34.995948791503906, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6649703138252756, |
| "grad_norm": 181.36537578535228, |
| "learning_rate": 2.4437147917428203e-07, |
| "logits/chosen": -1.033583641052246, |
| "logits/rejected": -1.026897668838501, |
| "logps/chosen": -2.8641695976257324, |
| "logps/rejected": -3.3429033756256104, |
| "loss": 3.1708, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.641693115234375, |
| "rewards/margins": 4.787341117858887, |
| "rewards/rejected": -33.429039001464844, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.667232117613797, |
| "grad_norm": 232.87767557454146, |
| "learning_rate": 2.414604537452595e-07, |
| "logits/chosen": -1.0605061054229736, |
| "logits/rejected": -1.0549243688583374, |
| "logps/chosen": -2.7509469985961914, |
| "logps/rejected": -3.1501309871673584, |
| "loss": 3.3439, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -27.509469985961914, |
| "rewards/margins": 3.9918391704559326, |
| "rewards/rejected": -31.50130844116211, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6694939214023183, |
| "grad_norm": 140.3168861772763, |
| "learning_rate": 2.385593561403974e-07, |
| "logits/chosen": -1.072545051574707, |
| "logits/rejected": -1.0590806007385254, |
| "logps/chosen": -2.5582523345947266, |
| "logps/rejected": -3.033470392227173, |
| "loss": 3.001, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -25.58251953125, |
| "rewards/margins": 4.752185821533203, |
| "rewards/rejected": -30.33470916748047, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6717557251908397, |
| "grad_norm": 130.90402883012493, |
| "learning_rate": 2.3566836802785119e-07, |
| "logits/chosen": -1.0802139043807983, |
| "logits/rejected": -1.0805728435516357, |
| "logps/chosen": -2.5614163875579834, |
| "logps/rejected": -3.126112461090088, |
| "loss": 2.3384, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -25.614166259765625, |
| "rewards/margins": 5.6469621658325195, |
| "rewards/rejected": -31.261127471923828, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6740175289793611, |
| "grad_norm": 167.16655731623354, |
| "learning_rate": 2.327876704427146e-07, |
| "logits/chosen": -1.0259709358215332, |
| "logits/rejected": -1.0346571207046509, |
| "logps/chosen": -2.592196226119995, |
| "logps/rejected": -2.979300022125244, |
| "loss": 3.2647, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -25.92196273803711, |
| "rewards/margins": 3.871039628982544, |
| "rewards/rejected": -29.79300308227539, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6762793327678824, |
| "grad_norm": 260.03040862255443, |
| "learning_rate": 2.2991744377568358e-07, |
| "logits/chosen": -1.0623974800109863, |
| "logits/rejected": -1.0467642545700073, |
| "logps/chosen": -2.873406410217285, |
| "logps/rejected": -3.258018970489502, |
| "loss": 3.5362, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -28.73406410217285, |
| "rewards/margins": 3.8461239337921143, |
| "rewards/rejected": -32.5801887512207, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6785411365564037, |
| "grad_norm": 165.28649873322973, |
| "learning_rate": 2.270578677617601e-07, |
| "logits/chosen": -1.0998969078063965, |
| "logits/rejected": -1.0864473581314087, |
| "logps/chosen": -2.6970479488372803, |
| "logps/rejected": -3.2025129795074463, |
| "loss": 3.4805, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.970483779907227, |
| "rewards/margins": 5.054651260375977, |
| "rewards/rejected": -32.0251350402832, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6808029403449251, |
| "grad_norm": 147.02104564491643, |
| "learning_rate": 2.242091214689971e-07, |
| "logits/chosen": -1.080596923828125, |
| "logits/rejected": -1.0811580419540405, |
| "logps/chosen": -2.7154738903045654, |
| "logps/rejected": -3.343048095703125, |
| "loss": 2.413, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -27.154741287231445, |
| "rewards/margins": 6.27573823928833, |
| "rewards/rejected": -33.430477142333984, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6830647441334464, |
| "grad_norm": 168.1353554238078, |
| "learning_rate": 2.2137138328728456e-07, |
| "logits/chosen": -1.1214321851730347, |
| "logits/rejected": -1.1100562810897827, |
| "logps/chosen": -2.8991968631744385, |
| "logps/rejected": -3.303884983062744, |
| "loss": 3.0082, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.99197006225586, |
| "rewards/margins": 4.046879768371582, |
| "rewards/rejected": -33.038848876953125, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6853265479219678, |
| "grad_norm": 138.49574783119974, |
| "learning_rate": 2.1854483091717974e-07, |
| "logits/chosen": -1.1026039123535156, |
| "logits/rejected": -1.1136388778686523, |
| "logps/chosen": -2.7164382934570312, |
| "logps/rejected": -3.249185085296631, |
| "loss": 2.5052, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -27.16438102722168, |
| "rewards/margins": 5.32747220993042, |
| "rewards/rejected": -32.491851806640625, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6875883517104892, |
| "grad_norm": 154.94371001690558, |
| "learning_rate": 2.1572964135877863e-07, |
| "logits/chosen": -1.093347191810608, |
| "logits/rejected": -1.0927071571350098, |
| "logps/chosen": -2.7913358211517334, |
| "logps/rejected": -3.1904070377349854, |
| "loss": 3.5234, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.913358688354492, |
| "rewards/margins": 3.9907102584838867, |
| "rewards/rejected": -31.904071807861328, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6898501554990104, |
| "grad_norm": 150.4889483956078, |
| "learning_rate": 2.1292599090063245e-07, |
| "logits/chosen": -1.1195977926254272, |
| "logits/rejected": -1.1199798583984375, |
| "logps/chosen": -2.6830005645751953, |
| "logps/rejected": -3.2673914432525635, |
| "loss": 2.702, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -26.830005645751953, |
| "rewards/margins": 5.843911170959473, |
| "rewards/rejected": -32.67391586303711, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6921119592875318, |
| "grad_norm": 138.37763662950627, |
| "learning_rate": 2.1013405510870824e-07, |
| "logits/chosen": -1.0394456386566162, |
| "logits/rejected": -1.0511260032653809, |
| "logps/chosen": -2.730743885040283, |
| "logps/rejected": -3.277578115463257, |
| "loss": 2.974, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -27.30743980407715, |
| "rewards/margins": 5.4683427810668945, |
| "rewards/rejected": -32.775779724121094, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6943737630760531, |
| "grad_norm": 148.04801804973624, |
| "learning_rate": 2.0735400881539494e-07, |
| "logits/chosen": -1.0334455966949463, |
| "logits/rejected": -1.0548396110534668, |
| "logps/chosen": -2.8730499744415283, |
| "logps/rejected": -3.461742877960205, |
| "loss": 2.6681, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -28.730499267578125, |
| "rewards/margins": 5.886929988861084, |
| "rewards/rejected": -34.617431640625, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6966355668645745, |
| "grad_norm": 221.53992304852852, |
| "learning_rate": 2.0458602610855536e-07, |
| "logits/chosen": -1.1213308572769165, |
| "logits/rejected": -1.1119002103805542, |
| "logps/chosen": -2.9042418003082275, |
| "logps/rejected": -3.3705925941467285, |
| "loss": 2.7571, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -29.042417526245117, |
| "rewards/margins": 4.663509368896484, |
| "rewards/rejected": -33.705928802490234, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6988973706530959, |
| "grad_norm": 136.07823495543695, |
| "learning_rate": 2.0183028032062422e-07, |
| "logits/chosen": -1.0783358812332153, |
| "logits/rejected": -1.0803169012069702, |
| "logps/chosen": -2.797858238220215, |
| "logps/rejected": -3.263309955596924, |
| "loss": 3.0103, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.97858238220215, |
| "rewards/margins": 4.654518127441406, |
| "rewards/rejected": -32.63310623168945, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7011591744416172, |
| "grad_norm": 155.65801501138648, |
| "learning_rate": 1.9908694401775473e-07, |
| "logits/chosen": -1.1042208671569824, |
| "logits/rejected": -1.1049745082855225, |
| "logps/chosen": -2.884843349456787, |
| "logps/rejected": -3.377685785293579, |
| "loss": 2.9679, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.848434448242188, |
| "rewards/margins": 4.92842435836792, |
| "rewards/rejected": -33.776859283447266, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7034209782301385, |
| "grad_norm": 139.09066002924212, |
| "learning_rate": 1.9635618898901196e-07, |
| "logits/chosen": -1.0947176218032837, |
| "logits/rejected": -1.091169834136963, |
| "logps/chosen": -3.14107608795166, |
| "logps/rejected": -3.6818785667419434, |
| "loss": 3.0449, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -31.410762786865234, |
| "rewards/margins": 5.408024311065674, |
| "rewards/rejected": -36.81878662109375, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7056827820186599, |
| "grad_norm": 162.5311793566366, |
| "learning_rate": 1.9363818623561565e-07, |
| "logits/chosen": -1.0548720359802246, |
| "logits/rejected": -1.0498396158218384, |
| "logps/chosen": -2.948056697845459, |
| "logps/rejected": -3.4283742904663086, |
| "loss": 3.2606, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -29.480567932128906, |
| "rewards/margins": 4.803174018859863, |
| "rewards/rejected": -34.28374099731445, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7079445858071812, |
| "grad_norm": 144.53325872542644, |
| "learning_rate": 1.9093310596023108e-07, |
| "logits/chosen": -1.0383893251419067, |
| "logits/rejected": -1.0171747207641602, |
| "logps/chosen": -2.96266770362854, |
| "logps/rejected": -3.5945122241973877, |
| "loss": 2.5371, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -29.626678466796875, |
| "rewards/margins": 6.318445205688477, |
| "rewards/rejected": -35.94512176513672, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7102063895957026, |
| "grad_norm": 159.839020331253, |
| "learning_rate": 1.8824111755631274e-07, |
| "logits/chosen": -1.09993577003479, |
| "logits/rejected": -1.0983682870864868, |
| "logps/chosen": -2.958811044692993, |
| "logps/rejected": -3.3868050575256348, |
| "loss": 3.5182, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -29.588109970092773, |
| "rewards/margins": 4.279940128326416, |
| "rewards/rejected": -33.8680534362793, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.712468193384224, |
| "grad_norm": 173.12178435051865, |
| "learning_rate": 1.8556238959749457e-07, |
| "logits/chosen": -1.0669752359390259, |
| "logits/rejected": -1.058374047279358, |
| "logps/chosen": -3.1393425464630127, |
| "logps/rejected": -3.544556140899658, |
| "loss": 3.4766, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -31.3934268951416, |
| "rewards/margins": 4.052134990692139, |
| "rewards/rejected": -35.445560455322266, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7147299971727452, |
| "grad_norm": 163.63036158743895, |
| "learning_rate": 1.8289708982703562e-07, |
| "logits/chosen": -1.0433309078216553, |
| "logits/rejected": -1.026186466217041, |
| "logps/chosen": -2.943878650665283, |
| "logps/rejected": -3.444617748260498, |
| "loss": 3.506, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.438785552978516, |
| "rewards/margins": 5.0073957443237305, |
| "rewards/rejected": -34.4461784362793, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7169918009612666, |
| "grad_norm": 174.9260498692663, |
| "learning_rate": 1.802453851473151e-07, |
| "logits/chosen": -1.102484107017517, |
| "logits/rejected": -1.1031461954116821, |
| "logps/chosen": -3.222090005874634, |
| "logps/rejected": -3.821988582611084, |
| "loss": 2.6827, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -32.22090148925781, |
| "rewards/margins": 5.9989824295043945, |
| "rewards/rejected": -38.219886779785156, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.719253604749788, |
| "grad_norm": 160.37374943304434, |
| "learning_rate": 1.7760744160938093e-07, |
| "logits/chosen": -1.0474447011947632, |
| "logits/rejected": -1.031981348991394, |
| "logps/chosen": -3.0827102661132812, |
| "logps/rejected": -3.7794246673583984, |
| "loss": 2.3634, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -30.827098846435547, |
| "rewards/margins": 6.967146396636963, |
| "rewards/rejected": -37.794246673583984, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7215154085383093, |
| "grad_norm": 130.2366582324951, |
| "learning_rate": 1.7498342440255135e-07, |
| "logits/chosen": -1.110652208328247, |
| "logits/rejected": -1.092280626296997, |
| "logps/chosen": -2.9385170936584473, |
| "logps/rejected": -3.447098970413208, |
| "loss": 2.7639, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -29.385169982910156, |
| "rewards/margins": 5.085820198059082, |
| "rewards/rejected": -34.47098922729492, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7237772123268307, |
| "grad_norm": 137.53735259891135, |
| "learning_rate": 1.7237349784407115e-07, |
| "logits/chosen": -1.098408818244934, |
| "logits/rejected": -1.087859869003296, |
| "logps/chosen": -3.0967419147491455, |
| "logps/rejected": -3.6063919067382812, |
| "loss": 3.0231, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -30.967418670654297, |
| "rewards/margins": 5.096498489379883, |
| "rewards/rejected": -36.06391525268555, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.726039016115352, |
| "grad_norm": 157.85568588838194, |
| "learning_rate": 1.6977782536882178e-07, |
| "logits/chosen": -1.055006742477417, |
| "logits/rejected": -1.057015299797058, |
| "logps/chosen": -2.8752079010009766, |
| "logps/rejected": -3.4953298568725586, |
| "loss": 2.5631, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.7520751953125, |
| "rewards/margins": 6.2012176513671875, |
| "rewards/rejected": -34.95329284667969, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7283008199038733, |
| "grad_norm": 167.22358095522708, |
| "learning_rate": 1.6719656951908708e-07, |
| "logits/chosen": -1.03927481174469, |
| "logits/rejected": -1.0443062782287598, |
| "logps/chosen": -2.6960246562957764, |
| "logps/rejected": -3.2267005443573, |
| "loss": 2.8958, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -26.960247039794922, |
| "rewards/margins": 5.306758880615234, |
| "rewards/rejected": -32.267005920410156, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7305626236923947, |
| "grad_norm": 141.43318236850038, |
| "learning_rate": 1.6462989193437453e-07, |
| "logits/chosen": -1.1290605068206787, |
| "logits/rejected": -1.1231218576431274, |
| "logps/chosen": -2.9903295040130615, |
| "logps/rejected": -3.4813930988311768, |
| "loss": 3.0688, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -29.903291702270508, |
| "rewards/margins": 4.9106364250183105, |
| "rewards/rejected": -34.813934326171875, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.732824427480916, |
| "grad_norm": 153.5405476048427, |
| "learning_rate": 1.6207795334129365e-07, |
| "logits/chosen": -1.0967392921447754, |
| "logits/rejected": -1.0955842733383179, |
| "logps/chosen": -3.0994861125946045, |
| "logps/rejected": -3.6565637588500977, |
| "loss": 3.0762, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -30.99485969543457, |
| "rewards/margins": 5.570777893066406, |
| "rewards/rejected": -36.56563949584961, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7350862312694374, |
| "grad_norm": 172.69716391444982, |
| "learning_rate": 1.5954091354349121e-07, |
| "logits/chosen": -1.1257866621017456, |
| "logits/rejected": -1.1157145500183105, |
| "logps/chosen": -3.0154027938842773, |
| "logps/rejected": -3.544265031814575, |
| "loss": 2.8422, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -30.154027938842773, |
| "rewards/margins": 5.288622856140137, |
| "rewards/rejected": -35.442649841308594, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7373480350579588, |
| "grad_norm": 177.90906140786845, |
| "learning_rate": 1.5701893141164364e-07, |
| "logits/chosen": -1.1034339666366577, |
| "logits/rejected": -1.10421621799469, |
| "logps/chosen": -3.1780266761779785, |
| "logps/rejected": -3.7761831283569336, |
| "loss": 3.7696, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -31.78026580810547, |
| "rewards/margins": 5.981565475463867, |
| "rewards/rejected": -37.7618293762207, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.73960983884648, |
| "grad_norm": 183.54016496777058, |
| "learning_rate": 1.545121648735093e-07, |
| "logits/chosen": -1.1025980710983276, |
| "logits/rejected": -1.0847091674804688, |
| "logps/chosen": -3.075312614440918, |
| "logps/rejected": -3.548532009124756, |
| "loss": 3.2151, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -30.75312614440918, |
| "rewards/margins": 4.732194423675537, |
| "rewards/rejected": -35.485321044921875, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7418716426350014, |
| "grad_norm": 135.25748433449712, |
| "learning_rate": 1.5202077090403863e-07, |
| "logits/chosen": -1.1285474300384521, |
| "logits/rejected": -1.1008970737457275, |
| "logps/chosen": -2.8346710205078125, |
| "logps/rejected": -3.3540940284729004, |
| "loss": 2.5201, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -28.34670639038086, |
| "rewards/margins": 5.1942338943481445, |
| "rewards/rejected": -33.54093933105469, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7441334464235227, |
| "grad_norm": 163.3852037175944, |
| "learning_rate": 1.495449055155443e-07, |
| "logits/chosen": -1.11967134475708, |
| "logits/rejected": -1.1241058111190796, |
| "logps/chosen": -3.2236409187316895, |
| "logps/rejected": -3.8717517852783203, |
| "loss": 2.5778, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -32.23640823364258, |
| "rewards/margins": 6.481108665466309, |
| "rewards/rejected": -38.7175178527832, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7463952502120441, |
| "grad_norm": 189.7340951008119, |
| "learning_rate": 1.4708472374793112e-07, |
| "logits/chosen": -1.0307663679122925, |
| "logits/rejected": -1.0201297998428345, |
| "logps/chosen": -3.2278249263763428, |
| "logps/rejected": -3.6534264087677, |
| "loss": 3.6326, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -32.27824783325195, |
| "rewards/margins": 4.256016254425049, |
| "rewards/rejected": -36.534263610839844, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7486570540005655, |
| "grad_norm": 162.72584520699186, |
| "learning_rate": 1.4464037965898878e-07, |
| "logits/chosen": -1.0099622011184692, |
| "logits/rejected": -0.9994704127311707, |
| "logps/chosen": -2.914703369140625, |
| "logps/rejected": -3.456731081008911, |
| "loss": 2.8021, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.147035598754883, |
| "rewards/margins": 5.4202775955200195, |
| "rewards/rejected": -34.56731033325195, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7509188577890868, |
| "grad_norm": 171.09208924948626, |
| "learning_rate": 1.4221202631474282e-07, |
| "logits/chosen": -1.036645770072937, |
| "logits/rejected": -1.047488808631897, |
| "logps/chosen": -3.04544734954834, |
| "logps/rejected": -3.528249502182007, |
| "loss": 3.1024, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -30.4544677734375, |
| "rewards/margins": 4.828027725219727, |
| "rewards/rejected": -35.28249740600586, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7531806615776081, |
| "grad_norm": 170.6804908176681, |
| "learning_rate": 1.3979981577987113e-07, |
| "logits/chosen": -1.0796502828598022, |
| "logits/rejected": -1.0737544298171997, |
| "logps/chosen": -2.974057197570801, |
| "logps/rejected": -3.5673232078552246, |
| "loss": 2.4609, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.740570068359375, |
| "rewards/margins": 5.932661533355713, |
| "rewards/rejected": -35.6732292175293, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7554424653661295, |
| "grad_norm": 157.68396676428247, |
| "learning_rate": 1.374038991081807e-07, |
| "logits/chosen": -1.0844019651412964, |
| "logits/rejected": -1.0833029747009277, |
| "logps/chosen": -3.113481044769287, |
| "logps/rejected": -3.539191484451294, |
| "loss": 2.9737, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -31.134809494018555, |
| "rewards/margins": 4.257106781005859, |
| "rewards/rejected": -35.39191436767578, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7577042691546508, |
| "grad_norm": 159.54123794673237, |
| "learning_rate": 1.3502442633314882e-07, |
| "logits/chosen": -1.08342707157135, |
| "logits/rejected": -1.0705313682556152, |
| "logps/chosen": -2.7244319915771484, |
| "logps/rejected": -3.2218587398529053, |
| "loss": 2.6449, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.244319915771484, |
| "rewards/margins": 4.97426700592041, |
| "rewards/rejected": -32.218589782714844, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7599660729431722, |
| "grad_norm": 139.57722381793081, |
| "learning_rate": 1.3266154645852815e-07, |
| "logits/chosen": -1.058245062828064, |
| "logits/rejected": -1.0414983034133911, |
| "logps/chosen": -3.0073535442352295, |
| "logps/rejected": -3.5276637077331543, |
| "loss": 2.7813, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -30.073535919189453, |
| "rewards/margins": 5.20310115814209, |
| "rewards/rejected": -35.27663803100586, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7622278767316936, |
| "grad_norm": 174.03982323832386, |
| "learning_rate": 1.303154074490152e-07, |
| "logits/chosen": -1.1049643754959106, |
| "logits/rejected": -1.083824872970581, |
| "logps/chosen": -3.008976697921753, |
| "logps/rejected": -3.5545217990875244, |
| "loss": 3.0701, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -30.089771270751953, |
| "rewards/margins": 5.455449104309082, |
| "rewards/rejected": -35.54521942138672, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7644896805202148, |
| "grad_norm": 160.9242319744395, |
| "learning_rate": 1.2798615622098616e-07, |
| "logits/chosen": -1.118033766746521, |
| "logits/rejected": -1.104003667831421, |
| "logps/chosen": -2.9770729541778564, |
| "logps/rejected": -3.559138536453247, |
| "loss": 2.7528, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -29.770729064941406, |
| "rewards/margins": 5.820652961730957, |
| "rewards/rejected": -35.59138107299805, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7667514843087362, |
| "grad_norm": 121.06171902418025, |
| "learning_rate": 1.2567393863329523e-07, |
| "logits/chosen": -1.0702447891235352, |
| "logits/rejected": -1.092645287513733, |
| "logps/chosen": -2.9810526371002197, |
| "logps/rejected": -3.5818190574645996, |
| "loss": 2.5609, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -29.810523986816406, |
| "rewards/margins": 6.00766658782959, |
| "rewards/rejected": -35.81819152832031, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7690132880972576, |
| "grad_norm": 170.89823033285558, |
| "learning_rate": 1.233788994781423e-07, |
| "logits/chosen": -1.1062794923782349, |
| "logits/rejected": -1.1038752794265747, |
| "logps/chosen": -3.008683443069458, |
| "logps/rejected": -3.5800118446350098, |
| "loss": 2.6117, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -30.086833953857422, |
| "rewards/margins": 5.713282585144043, |
| "rewards/rejected": -35.800113677978516, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7712750918857789, |
| "grad_norm": 131.00188496624259, |
| "learning_rate": 1.2110118247200468e-07, |
| "logits/chosen": -1.093713641166687, |
| "logits/rejected": -1.0842986106872559, |
| "logps/chosen": -2.8371694087982178, |
| "logps/rejected": -3.4009671211242676, |
| "loss": 2.5132, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.371692657470703, |
| "rewards/margins": 5.637977123260498, |
| "rewards/rejected": -34.00967025756836, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7735368956743003, |
| "grad_norm": 193.45548191250376, |
| "learning_rate": 1.1884093024663933e-07, |
| "logits/chosen": -1.0978885889053345, |
| "logits/rejected": -1.096379041671753, |
| "logps/chosen": -2.7560176849365234, |
| "logps/rejected": -3.429332971572876, |
| "loss": 3.0091, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -27.560178756713867, |
| "rewards/margins": 6.733152389526367, |
| "rewards/rejected": -34.293331146240234, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7757986994628217, |
| "grad_norm": 179.19045707900517, |
| "learning_rate": 1.1659828434014886e-07, |
| "logits/chosen": -1.0911431312561035, |
| "logits/rejected": -1.0719244480133057, |
| "logps/chosen": -2.8892617225646973, |
| "logps/rejected": -3.54764461517334, |
| "loss": 2.8438, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.892620086669922, |
| "rewards/margins": 6.583826065063477, |
| "rewards/rejected": -35.47644805908203, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7780605032513429, |
| "grad_norm": 161.1666542002096, |
| "learning_rate": 1.143733851881203e-07, |
| "logits/chosen": -1.1230119466781616, |
| "logits/rejected": -1.1065000295639038, |
| "logps/chosen": -3.1044983863830566, |
| "logps/rejected": -3.6987762451171875, |
| "loss": 3.0252, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -31.044984817504883, |
| "rewards/margins": 5.942776203155518, |
| "rewards/rejected": -36.987762451171875, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7803223070398643, |
| "grad_norm": 162.875371762857, |
| "learning_rate": 1.1216637211483005e-07, |
| "logits/chosen": -1.0864002704620361, |
| "logits/rejected": -1.076468586921692, |
| "logps/chosen": -2.995060920715332, |
| "logps/rejected": -3.4805409908294678, |
| "loss": 3.1132, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.95060920715332, |
| "rewards/margins": 4.85480260848999, |
| "rewards/rejected": -34.80541229248047, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7825841108283856, |
| "grad_norm": 211.00125843356489, |
| "learning_rate": 1.0997738332451936e-07, |
| "logits/chosen": -1.0667786598205566, |
| "logits/rejected": -1.0570969581604004, |
| "logps/chosen": -3.2074437141418457, |
| "logps/rejected": -3.686741352081299, |
| "loss": 2.8699, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -32.074440002441406, |
| "rewards/margins": 4.792973041534424, |
| "rewards/rejected": -36.86741256713867, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.784845914616907, |
| "grad_norm": 157.4005395746414, |
| "learning_rate": 1.0780655589274031e-07, |
| "logits/chosen": -1.1231722831726074, |
| "logits/rejected": -1.101925253868103, |
| "logps/chosen": -3.014099359512329, |
| "logps/rejected": -3.558246374130249, |
| "loss": 2.693, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -30.1409912109375, |
| "rewards/margins": 5.441472053527832, |
| "rewards/rejected": -35.582462310791016, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7871077184054284, |
| "grad_norm": 165.93325016524315, |
| "learning_rate": 1.056540257577712e-07, |
| "logits/chosen": -1.0740177631378174, |
| "logits/rejected": -1.0665497779846191, |
| "logps/chosen": -3.4591240882873535, |
| "logps/rejected": -4.057380676269531, |
| "loss": 2.6869, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -34.59123992919922, |
| "rewards/margins": 5.982567310333252, |
| "rewards/rejected": -40.57380676269531, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7893695221939496, |
| "grad_norm": 147.55749046925172, |
| "learning_rate": 1.0351992771210554e-07, |
| "logits/chosen": -1.0476750135421753, |
| "logits/rejected": -1.0461106300354004, |
| "logps/chosen": -3.0786919593811035, |
| "logps/rejected": -3.583596706390381, |
| "loss": 2.8979, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -30.786916732788086, |
| "rewards/margins": 5.049046993255615, |
| "rewards/rejected": -35.83596420288086, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.791631325982471, |
| "grad_norm": 193.43103769575845, |
| "learning_rate": 1.0140439539400953e-07, |
| "logits/chosen": -1.0643444061279297, |
| "logits/rejected": -1.0696581602096558, |
| "logps/chosen": -3.1182804107666016, |
| "logps/rejected": -3.620661735534668, |
| "loss": 3.5198, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -31.182804107666016, |
| "rewards/margins": 5.023812770843506, |
| "rewards/rejected": -36.20661926269531, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7938931297709924, |
| "grad_norm": 141.88482331317974, |
| "learning_rate": 9.930756127915488e-08, |
| "logits/chosen": -1.0582480430603027, |
| "logits/rejected": -1.0691368579864502, |
| "logps/chosen": -2.9359114170074463, |
| "logps/rejected": -3.5403919219970703, |
| "loss": 2.7303, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -29.359111785888672, |
| "rewards/margins": 6.044802665710449, |
| "rewards/rejected": -35.4039192199707, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7961549335595137, |
| "grad_norm": 176.14309347354188, |
| "learning_rate": 9.722955667232242e-08, |
| "logits/chosen": -1.103529453277588, |
| "logits/rejected": -1.1015864610671997, |
| "logps/chosen": -3.259512424468994, |
| "logps/rejected": -3.5993950366973877, |
| "loss": 3.9747, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -32.59511947631836, |
| "rewards/margins": 3.398827075958252, |
| "rewards/rejected": -35.99394989013672, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7984167373480351, |
| "grad_norm": 161.52973237898686, |
| "learning_rate": 9.517051169918016e-08, |
| "logits/chosen": -1.1242177486419678, |
| "logits/rejected": -1.131679892539978, |
| "logps/chosen": -3.072510004043579, |
| "logps/rejected": -3.5460586547851562, |
| "loss": 3.0567, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -30.725101470947266, |
| "rewards/margins": 4.735486030578613, |
| "rewards/rejected": -35.46058654785156, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8006785411365565, |
| "grad_norm": 173.94582839965446, |
| "learning_rate": 9.313055529813412e-08, |
| "logits/chosen": -1.0249019861221313, |
| "logits/rejected": -1.0524556636810303, |
| "logps/chosen": -2.8515758514404297, |
| "logps/rejected": -3.4208881855010986, |
| "loss": 2.6105, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.51576042175293, |
| "rewards/margins": 5.693122863769531, |
| "rewards/rejected": -34.20888137817383, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8029403449250777, |
| "grad_norm": 186.23957915125598, |
| "learning_rate": 9.110981521225532e-08, |
| "logits/chosen": -1.100682258605957, |
| "logits/rejected": -1.080102801322937, |
| "logps/chosen": -3.1016242504119873, |
| "logps/rejected": -3.5042476654052734, |
| "loss": 3.6117, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -31.01624298095703, |
| "rewards/margins": 4.0262370109558105, |
| "rewards/rejected": -35.04248046875, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8052021487135991, |
| "grad_norm": 179.43856596279534, |
| "learning_rate": 8.910841798127884e-08, |
| "logits/chosen": -1.0597988367080688, |
| "logits/rejected": -1.0726373195648193, |
| "logps/chosen": -3.047053813934326, |
| "logps/rejected": -3.5895328521728516, |
| "loss": 2.874, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -30.470539093017578, |
| "rewards/margins": 5.424790382385254, |
| "rewards/rejected": -35.89532470703125, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8074639525021204, |
| "grad_norm": 161.8027690554887, |
| "learning_rate": 8.712648893368139e-08, |
| "logits/chosen": -1.0760971307754517, |
| "logits/rejected": -1.0981957912445068, |
| "logps/chosen": -3.037094831466675, |
| "logps/rejected": -3.6570444107055664, |
| "loss": 2.6992, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -30.370943069458008, |
| "rewards/margins": 6.199495315551758, |
| "rewards/rejected": -36.57044219970703, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8097257562906418, |
| "grad_norm": 136.66722952170457, |
| "learning_rate": 8.516415217883186e-08, |
| "logits/chosen": -1.0790215730667114, |
| "logits/rejected": -1.0539416074752808, |
| "logps/chosen": -2.8489599227905273, |
| "logps/rejected": -3.426215648651123, |
| "loss": 3.0279, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.489595413208008, |
| "rewards/margins": 5.772557735443115, |
| "rewards/rejected": -34.26215362548828, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8119875600791632, |
| "grad_norm": 183.0413049106842, |
| "learning_rate": 8.32215305992209e-08, |
| "logits/chosen": -1.1424682140350342, |
| "logits/rejected": -1.1471257209777832, |
| "logps/chosen": -2.8472583293914795, |
| "logps/rejected": -3.4035425186157227, |
| "loss": 2.7101, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.472583770751953, |
| "rewards/margins": 5.562839984893799, |
| "rewards/rejected": -34.035423278808594, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8142493638676844, |
| "grad_norm": 130.5926688418937, |
| "learning_rate": 8.129874584276448e-08, |
| "logits/chosen": -1.0832806825637817, |
| "logits/rejected": -1.0744616985321045, |
| "logps/chosen": -2.8213050365448, |
| "logps/rejected": -3.391476631164551, |
| "loss": 2.6731, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.213050842285156, |
| "rewards/margins": 5.701716423034668, |
| "rewards/rejected": -33.91476821899414, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8165111676562058, |
| "grad_norm": 170.2145280077911, |
| "learning_rate": 7.939591831518746e-08, |
| "logits/chosen": -1.093398928642273, |
| "logits/rejected": -1.094681739807129, |
| "logps/chosen": -2.789719581604004, |
| "logps/rejected": -3.1508536338806152, |
| "loss": 3.2504, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -27.897193908691406, |
| "rewards/margins": 3.611339807510376, |
| "rewards/rejected": -31.508529663085938, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8187729714447272, |
| "grad_norm": 148.16847439682329, |
| "learning_rate": 7.751316717248304e-08, |
| "logits/chosen": -1.0858169794082642, |
| "logits/rejected": -1.0891151428222656, |
| "logps/chosen": -3.0676655769348145, |
| "logps/rejected": -3.830179214477539, |
| "loss": 2.2728, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -30.676654815673828, |
| "rewards/margins": 7.625136852264404, |
| "rewards/rejected": -38.30179214477539, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8210347752332485, |
| "grad_norm": 161.20986921430406, |
| "learning_rate": 7.565061031345142e-08, |
| "logits/chosen": -1.0509235858917236, |
| "logits/rejected": -1.0490310192108154, |
| "logps/chosen": -3.1492578983306885, |
| "logps/rejected": -3.728961944580078, |
| "loss": 2.884, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -31.492576599121094, |
| "rewards/margins": 5.79704475402832, |
| "rewards/rejected": -37.28961944580078, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8232965790217699, |
| "grad_norm": 150.85257421881246, |
| "learning_rate": 7.380836437231686e-08, |
| "logits/chosen": -1.0766702890396118, |
| "logits/rejected": -1.0677735805511475, |
| "logps/chosen": -2.8776063919067383, |
| "logps/rejected": -3.4943623542785645, |
| "loss": 2.4755, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.77606201171875, |
| "rewards/margins": 6.167560577392578, |
| "rewards/rejected": -34.94362258911133, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8255583828102913, |
| "grad_norm": 149.72523598721824, |
| "learning_rate": 7.198654471142371e-08, |
| "logits/chosen": -1.0955713987350464, |
| "logits/rejected": -1.0762622356414795, |
| "logps/chosen": -2.9110095500946045, |
| "logps/rejected": -3.5544984340667725, |
| "loss": 2.7029, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -29.11009407043457, |
| "rewards/margins": 6.43488883972168, |
| "rewards/rejected": -35.54498291015625, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8278201865988125, |
| "grad_norm": 245.9741650955417, |
| "learning_rate": 7.01852654140132e-08, |
| "logits/chosen": -1.1138908863067627, |
| "logits/rejected": -1.1113349199295044, |
| "logps/chosen": -3.091291666030884, |
| "logps/rejected": -3.6368563175201416, |
| "loss": 3.0739, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -30.91291618347168, |
| "rewards/margins": 5.45564603805542, |
| "rewards/rejected": -36.36856460571289, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8300819903873339, |
| "grad_norm": 155.74801290743875, |
| "learning_rate": 6.840463927707833e-08, |
| "logits/chosen": -1.0664961338043213, |
| "logits/rejected": -1.080258846282959, |
| "logps/chosen": -3.084540367126465, |
| "logps/rejected": -3.6539242267608643, |
| "loss": 2.5649, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -30.84540367126465, |
| "rewards/margins": 5.6938371658325195, |
| "rewards/rejected": -36.539241790771484, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8323437941758552, |
| "grad_norm": 151.84768757368855, |
| "learning_rate": 6.664477780430138e-08, |
| "logits/chosen": -1.0625722408294678, |
| "logits/rejected": -1.0663492679595947, |
| "logps/chosen": -3.0569539070129395, |
| "logps/rejected": -3.5362274646759033, |
| "loss": 3.1163, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -30.56954002380371, |
| "rewards/margins": 4.792736530303955, |
| "rewards/rejected": -35.36227798461914, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8346055979643766, |
| "grad_norm": 171.73774525120407, |
| "learning_rate": 6.49057911990711e-08, |
| "logits/chosen": -1.0828893184661865, |
| "logits/rejected": -1.0686218738555908, |
| "logps/chosen": -3.017665386199951, |
| "logps/rejected": -3.5277650356292725, |
| "loss": 3.1351, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -30.176654815673828, |
| "rewards/margins": 5.1009955406188965, |
| "rewards/rejected": -35.27764892578125, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.836867401752898, |
| "grad_norm": 145.4377135801225, |
| "learning_rate": 6.318778835758189e-08, |
| "logits/chosen": -1.0982502698898315, |
| "logits/rejected": -1.0948173999786377, |
| "logps/chosen": -3.06710147857666, |
| "logps/rejected": -3.6917946338653564, |
| "loss": 2.3441, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -30.67101287841797, |
| "rewards/margins": 6.2469329833984375, |
| "rewards/rejected": -36.917945861816406, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8391292055414192, |
| "grad_norm": 197.55910952033693, |
| "learning_rate": 6.149087686201433e-08, |
| "logits/chosen": -1.1107332706451416, |
| "logits/rejected": -1.1229714155197144, |
| "logps/chosen": -2.962777853012085, |
| "logps/rejected": -3.3359103202819824, |
| "loss": 3.9932, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.62778091430664, |
| "rewards/margins": 3.731322765350342, |
| "rewards/rejected": -33.359107971191406, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8413910093299406, |
| "grad_norm": 155.21912723349024, |
| "learning_rate": 5.98151629737988e-08, |
| "logits/chosen": -1.1003910303115845, |
| "logits/rejected": -1.0755306482315063, |
| "logps/chosen": -3.033722400665283, |
| "logps/rejected": -3.64453387260437, |
| "loss": 2.9219, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -30.33721923828125, |
| "rewards/margins": 6.10811710357666, |
| "rewards/rejected": -36.445335388183594, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.843652813118462, |
| "grad_norm": 141.87616811965896, |
| "learning_rate": 5.816075162696097e-08, |
| "logits/chosen": -1.1110835075378418, |
| "logits/rejected": -1.1089603900909424, |
| "logps/chosen": -2.8409385681152344, |
| "logps/rejected": -3.3667705059051514, |
| "loss": 2.3721, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -28.40938377380371, |
| "rewards/margins": 5.2583208084106445, |
| "rewards/rejected": -33.66770553588867, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8459146169069833, |
| "grad_norm": 127.68193628951124, |
| "learning_rate": 5.6527746421551046e-08, |
| "logits/chosen": -1.0517830848693848, |
| "logits/rejected": -1.0378973484039307, |
| "logps/chosen": -2.8996520042419434, |
| "logps/rejected": -3.4401497840881348, |
| "loss": 2.9538, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -28.99652099609375, |
| "rewards/margins": 5.4049787521362305, |
| "rewards/rejected": -34.40149688720703, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8481764206955047, |
| "grad_norm": 147.04552648138863, |
| "learning_rate": 5.4916249617156064e-08, |
| "logits/chosen": -1.077235460281372, |
| "logits/rejected": -1.0792851448059082, |
| "logps/chosen": -2.7157163619995117, |
| "logps/rejected": -3.2315173149108887, |
| "loss": 2.8498, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -27.15716552734375, |
| "rewards/margins": 5.158007621765137, |
| "rewards/rejected": -32.31517028808594, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8504382244840261, |
| "grad_norm": 136.93878767480692, |
| "learning_rate": 5.332636212649646e-08, |
| "logits/chosen": -1.07993745803833, |
| "logits/rejected": -1.0671964883804321, |
| "logps/chosen": -2.8164620399475098, |
| "logps/rejected": -3.303701877593994, |
| "loss": 2.8001, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.16461944580078, |
| "rewards/margins": 4.872396469116211, |
| "rewards/rejected": -33.037017822265625, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8527000282725473, |
| "grad_norm": 138.27817441233987, |
| "learning_rate": 5.17581835091069e-08, |
| "logits/chosen": -1.0522788763046265, |
| "logits/rejected": -1.0637288093566895, |
| "logps/chosen": -2.8656744956970215, |
| "logps/rejected": -3.3920047283172607, |
| "loss": 3.0323, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.65674591064453, |
| "rewards/margins": 5.263302326202393, |
| "rewards/rejected": -33.9200439453125, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8549618320610687, |
| "grad_norm": 136.54322095828988, |
| "learning_rate": 5.02118119651016e-08, |
| "logits/chosen": -1.1301465034484863, |
| "logits/rejected": -1.130077600479126, |
| "logps/chosen": -2.8762614727020264, |
| "logps/rejected": -3.4399282932281494, |
| "loss": 2.6643, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.762617111206055, |
| "rewards/margins": 5.636669158935547, |
| "rewards/rejected": -34.39928436279297, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.85722363584959, |
| "grad_norm": 159.79319397102407, |
| "learning_rate": 4.868734432902526e-08, |
| "logits/chosen": -1.1377651691436768, |
| "logits/rejected": -1.1213501691818237, |
| "logps/chosen": -2.9207286834716797, |
| "logps/rejected": -3.5463762283325195, |
| "loss": 3.1626, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -29.207284927368164, |
| "rewards/margins": 6.256474494934082, |
| "rewards/rejected": -35.46376037597656, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8594854396381114, |
| "grad_norm": 206.34365826921854, |
| "learning_rate": 4.7184876063789134e-08, |
| "logits/chosen": -1.1172423362731934, |
| "logits/rejected": -1.118281602859497, |
| "logps/chosen": -2.6622347831726074, |
| "logps/rejected": -3.1349706649780273, |
| "loss": 2.9815, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -26.62234878540039, |
| "rewards/margins": 4.727357864379883, |
| "rewards/rejected": -31.349702835083008, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8617472434266328, |
| "grad_norm": 202.3644549275693, |
| "learning_rate": 4.570450125469314e-08, |
| "logits/chosen": -1.0802032947540283, |
| "logits/rejected": -1.0645304918289185, |
| "logps/chosen": -2.9399185180664062, |
| "logps/rejected": -3.5755248069763184, |
| "loss": 2.5091, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.399185180664062, |
| "rewards/margins": 6.356063365936279, |
| "rewards/rejected": -35.7552490234375, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.864009047215154, |
| "grad_norm": 142.61443241131124, |
| "learning_rate": 4.424631260353378e-08, |
| "logits/chosen": -1.1050821542739868, |
| "logits/rejected": -1.0963554382324219, |
| "logps/chosen": -2.7791900634765625, |
| "logps/rejected": -3.280860424041748, |
| "loss": 3.077, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -27.79190444946289, |
| "rewards/margins": 5.016700744628906, |
| "rewards/rejected": -32.80860137939453, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8662708510036754, |
| "grad_norm": 117.70172449775342, |
| "learning_rate": 4.281040142280008e-08, |
| "logits/chosen": -1.1450071334838867, |
| "logits/rejected": -1.1387488842010498, |
| "logps/chosen": -2.701590061187744, |
| "logps/rejected": -3.290010929107666, |
| "loss": 2.0548, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -27.015897750854492, |
| "rewards/margins": 5.884207725524902, |
| "rewards/rejected": -32.90010452270508, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8685326547921968, |
| "grad_norm": 204.206206294938, |
| "learning_rate": 4.1396857629954286e-08, |
| "logits/chosen": -1.1101422309875488, |
| "logits/rejected": -1.0988112688064575, |
| "logps/chosen": -3.2335309982299805, |
| "logps/rejected": -3.733260154724121, |
| "loss": 3.0684, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -32.33530807495117, |
| "rewards/margins": 4.997293472290039, |
| "rewards/rejected": -37.332603454589844, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8707944585807181, |
| "grad_norm": 153.54162168949213, |
| "learning_rate": 4.000576974180232e-08, |
| "logits/chosen": -1.0964728593826294, |
| "logits/rejected": -1.1033198833465576, |
| "logps/chosen": -2.8175177574157715, |
| "logps/rejected": -3.2958943843841553, |
| "loss": 3.1353, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.17517852783203, |
| "rewards/margins": 4.7837677001953125, |
| "rewards/rejected": -32.958946228027344, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8730562623692395, |
| "grad_norm": 172.21965212484756, |
| "learning_rate": 3.8637224868950066e-08, |
| "logits/chosen": -1.0778682231903076, |
| "logits/rejected": -1.0855255126953125, |
| "logps/chosen": -2.8457837104797363, |
| "logps/rejected": -3.306011438369751, |
| "loss": 3.127, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.457834243774414, |
| "rewards/margins": 4.60227632522583, |
| "rewards/rejected": -33.060115814208984, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8753180661577609, |
| "grad_norm": 155.4189818710374, |
| "learning_rate": 3.729130871034885e-08, |
| "logits/chosen": -1.1040070056915283, |
| "logits/rejected": -1.0997991561889648, |
| "logps/chosen": -2.8627209663391113, |
| "logps/rejected": -3.4421796798706055, |
| "loss": 2.5368, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -28.62721061706543, |
| "rewards/margins": 5.794586181640625, |
| "rewards/rejected": -34.42179489135742, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8775798699462821, |
| "grad_norm": 154.8763399842207, |
| "learning_rate": 3.596810554792888e-08, |
| "logits/chosen": -1.1059839725494385, |
| "logits/rejected": -1.1149722337722778, |
| "logps/chosen": -2.8257861137390137, |
| "logps/rejected": -3.35601544380188, |
| "loss": 2.9665, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -28.25786018371582, |
| "rewards/margins": 5.3022918701171875, |
| "rewards/rejected": -33.560150146484375, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8798416737348035, |
| "grad_norm": 147.65054864467723, |
| "learning_rate": 3.466769824132116e-08, |
| "logits/chosen": -1.0984127521514893, |
| "logits/rejected": -1.0669302940368652, |
| "logps/chosen": -2.8397090435028076, |
| "logps/rejected": -3.374936819076538, |
| "loss": 2.8977, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -28.397090911865234, |
| "rewards/margins": 5.352276802062988, |
| "rewards/rejected": -33.749366760253906, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8821034775233249, |
| "grad_norm": 157.74667917718264, |
| "learning_rate": 3.339016822266925e-08, |
| "logits/chosen": -1.0509486198425293, |
| "logits/rejected": -1.062030553817749, |
| "logps/chosen": -2.9279890060424805, |
| "logps/rejected": -3.5987658500671387, |
| "loss": 2.1703, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -29.279890060424805, |
| "rewards/margins": 6.707767963409424, |
| "rewards/rejected": -35.9876594543457, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8843652813118462, |
| "grad_norm": 146.8349228995068, |
| "learning_rate": 3.213559549152958e-08, |
| "logits/chosen": -1.1121222972869873, |
| "logits/rejected": -1.093271255493164, |
| "logps/chosen": -2.754055976867676, |
| "logps/rejected": -3.3426716327667236, |
| "loss": 2.8307, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -27.54056167602539, |
| "rewards/margins": 5.886153697967529, |
| "rewards/rejected": -33.42671203613281, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8866270851003676, |
| "grad_norm": 137.10119291122675, |
| "learning_rate": 3.090405860986203e-08, |
| "logits/chosen": -1.1325721740722656, |
| "logits/rejected": -1.15884268283844, |
| "logps/chosen": -2.919574022293091, |
| "logps/rejected": -3.656275987625122, |
| "loss": 2.3372, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -29.19573974609375, |
| "rewards/margins": 7.367020130157471, |
| "rewards/rejected": -36.56275939941406, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 157.3640886436071, |
| "learning_rate": 2.9695634697110315e-08, |
| "logits/chosen": -1.056593418121338, |
| "logits/rejected": -1.0579760074615479, |
| "logps/chosen": -2.707357883453369, |
| "logps/rejected": -3.351743459701538, |
| "loss": 2.8028, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.073579788208008, |
| "rewards/margins": 6.443853378295898, |
| "rewards/rejected": -33.517433166503906, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8911506926774102, |
| "grad_norm": 167.08718168939666, |
| "learning_rate": 2.8510399425372766e-08, |
| "logits/chosen": -1.0880804061889648, |
| "logits/rejected": -1.067001223564148, |
| "logps/chosen": -2.823213815689087, |
| "logps/rejected": -3.351870536804199, |
| "loss": 2.7972, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.23213768005371, |
| "rewards/margins": 5.286569595336914, |
| "rewards/rejected": -33.518707275390625, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8934124964659316, |
| "grad_norm": 170.71135881911255, |
| "learning_rate": 2.734842701466329e-08, |
| "logits/chosen": -1.1137869358062744, |
| "logits/rejected": -1.0902773141860962, |
| "logps/chosen": -3.2399227619171143, |
| "logps/rejected": -3.800828456878662, |
| "loss": 2.6217, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -32.39922332763672, |
| "rewards/margins": 5.609059810638428, |
| "rewards/rejected": -38.00828552246094, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8956743002544529, |
| "grad_norm": 155.60384357389907, |
| "learning_rate": 2.6209790228264438e-08, |
| "logits/chosen": -1.1179192066192627, |
| "logits/rejected": -1.1079840660095215, |
| "logps/chosen": -2.8274612426757812, |
| "logps/rejected": -3.3131465911865234, |
| "loss": 2.8649, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.274608612060547, |
| "rewards/margins": 4.856854438781738, |
| "rewards/rejected": -33.131465911865234, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8979361040429743, |
| "grad_norm": 158.73384494070228, |
| "learning_rate": 2.5094560368170305e-08, |
| "logits/chosen": -1.0657453536987305, |
| "logits/rejected": -1.0789343118667603, |
| "logps/chosen": -2.905332088470459, |
| "logps/rejected": -3.3988749980926514, |
| "loss": 2.7725, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -29.053321838378906, |
| "rewards/margins": 4.935429096221924, |
| "rewards/rejected": -33.98875427246094, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9001979078314957, |
| "grad_norm": 132.85292741199737, |
| "learning_rate": 2.4002807270621893e-08, |
| "logits/chosen": -1.1158607006072998, |
| "logits/rejected": -1.1061642169952393, |
| "logps/chosen": -2.8107986450195312, |
| "logps/rejected": -3.386343240737915, |
| "loss": 2.4806, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -28.107988357543945, |
| "rewards/margins": 5.7554426193237305, |
| "rewards/rejected": -33.863433837890625, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9024597116200169, |
| "grad_norm": 163.9414902624495, |
| "learning_rate": 2.293459930173354e-08, |
| "logits/chosen": -1.1135897636413574, |
| "logits/rejected": -1.1188251972198486, |
| "logps/chosen": -2.918743848800659, |
| "logps/rejected": -3.4281227588653564, |
| "loss": 2.8561, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.18743896484375, |
| "rewards/margins": 5.093789100646973, |
| "rewards/rejected": -34.281227111816406, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9047215154085383, |
| "grad_norm": 159.34621159580047, |
| "learning_rate": 2.189000335321256e-08, |
| "logits/chosen": -1.0814203023910522, |
| "logits/rejected": -1.0605090856552124, |
| "logps/chosen": -2.842669725418091, |
| "logps/rejected": -3.3467047214508057, |
| "loss": 3.2137, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.42669677734375, |
| "rewards/margins": 5.040349006652832, |
| "rewards/rejected": -33.46704864501953, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9069833191970597, |
| "grad_norm": 176.47557033405067, |
| "learning_rate": 2.086908483816954e-08, |
| "logits/chosen": -1.1017359495162964, |
| "logits/rejected": -1.0888714790344238, |
| "logps/chosen": -3.1457722187042236, |
| "logps/rejected": -3.6218981742858887, |
| "loss": 3.2885, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -31.457719802856445, |
| "rewards/margins": 4.76125955581665, |
| "rewards/rejected": -36.2189826965332, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.909245122985581, |
| "grad_norm": 171.77739705704673, |
| "learning_rate": 1.9871907687022717e-08, |
| "logits/chosen": -1.103614091873169, |
| "logits/rejected": -1.0980620384216309, |
| "logps/chosen": -2.7797932624816895, |
| "logps/rejected": -3.2574329376220703, |
| "loss": 2.9828, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -27.79793357849121, |
| "rewards/margins": 4.776399612426758, |
| "rewards/rejected": -32.5743293762207, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9115069267741024, |
| "grad_norm": 129.71420303866944, |
| "learning_rate": 1.889853434349451e-08, |
| "logits/chosen": -1.0566623210906982, |
| "logits/rejected": -1.0624772310256958, |
| "logps/chosen": -2.755463123321533, |
| "logps/rejected": -3.331778049468994, |
| "loss": 3.0728, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.55463409423828, |
| "rewards/margins": 5.76314640045166, |
| "rewards/rejected": -33.317779541015625, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9137687305626236, |
| "grad_norm": 146.67389640495995, |
| "learning_rate": 1.7949025760701164e-08, |
| "logits/chosen": -1.0643901824951172, |
| "logits/rejected": -1.0577207803726196, |
| "logps/chosen": -2.9979794025421143, |
| "logps/rejected": -3.5035500526428223, |
| "loss": 2.8652, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -29.979793548583984, |
| "rewards/margins": 5.0557050704956055, |
| "rewards/rejected": -35.035499572753906, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.916030534351145, |
| "grad_norm": 145.46946091603365, |
| "learning_rate": 1.7023441397336023e-08, |
| "logits/chosen": -1.1220301389694214, |
| "logits/rejected": -1.1037752628326416, |
| "logps/chosen": -2.870546340942383, |
| "logps/rejected": -3.4554359912872314, |
| "loss": 2.4947, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -28.70546531677246, |
| "rewards/margins": 5.848890781402588, |
| "rewards/rejected": -34.55435562133789, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9182923381396664, |
| "grad_norm": 147.2092751589586, |
| "learning_rate": 1.6121839213945854e-08, |
| "logits/chosen": -1.0786685943603516, |
| "logits/rejected": -1.0783849954605103, |
| "logps/chosen": -2.9078550338745117, |
| "logps/rejected": -3.536391258239746, |
| "loss": 3.0017, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.078548431396484, |
| "rewards/margins": 6.28536319732666, |
| "rewards/rejected": -35.36391067504883, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9205541419281877, |
| "grad_norm": 186.25880993372996, |
| "learning_rate": 1.5244275669301777e-08, |
| "logits/chosen": -1.113109827041626, |
| "logits/rejected": -1.0987439155578613, |
| "logps/chosen": -2.944148302078247, |
| "logps/rejected": -3.5245351791381836, |
| "loss": 2.8001, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -29.44148063659668, |
| "rewards/margins": 5.8038716316223145, |
| "rewards/rejected": -35.24535369873047, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9228159457167091, |
| "grad_norm": 178.1440969919412, |
| "learning_rate": 1.4390805716863398e-08, |
| "logits/chosen": -1.0935332775115967, |
| "logits/rejected": -1.094548225402832, |
| "logps/chosen": -2.868260383605957, |
| "logps/rejected": -3.3665590286254883, |
| "loss": 3.0518, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -28.682598114013672, |
| "rewards/margins": 4.982987403869629, |
| "rewards/rejected": -33.665584564208984, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9250777495052305, |
| "grad_norm": 130.74419848686492, |
| "learning_rate": 1.3561482801337908e-08, |
| "logits/chosen": -1.0224024057388306, |
| "logits/rejected": -1.038309097290039, |
| "logps/chosen": -2.6347591876983643, |
| "logps/rejected": -3.282350540161133, |
| "loss": 2.557, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -26.347593307495117, |
| "rewards/margins": 6.475912570953369, |
| "rewards/rejected": -32.82350540161133, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9273395532937517, |
| "grad_norm": 191.06201962614162, |
| "learning_rate": 1.2756358855332904e-08, |
| "logits/chosen": -1.1101237535476685, |
| "logits/rejected": -1.1116387844085693, |
| "logps/chosen": -2.9200122356414795, |
| "logps/rejected": -3.3554527759552, |
| "loss": 3.4354, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.200119018554688, |
| "rewards/margins": 4.354408264160156, |
| "rewards/rejected": -33.55453109741211, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9296013570822731, |
| "grad_norm": 134.29545605610724, |
| "learning_rate": 1.1975484296105154e-08, |
| "logits/chosen": -1.0667734146118164, |
| "logits/rejected": -1.0599286556243896, |
| "logps/chosen": -2.877082109451294, |
| "logps/rejected": -3.443552017211914, |
| "loss": 2.5938, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.770824432373047, |
| "rewards/margins": 5.6646928787231445, |
| "rewards/rejected": -34.435516357421875, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9318631608707945, |
| "grad_norm": 174.84962964775417, |
| "learning_rate": 1.1218908022402374e-08, |
| "logits/chosen": -1.0731241703033447, |
| "logits/rejected": -1.0706862211227417, |
| "logps/chosen": -2.7748520374298096, |
| "logps/rejected": -3.357201337814331, |
| "loss": 2.8863, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -27.74852180480957, |
| "rewards/margins": 5.823493003845215, |
| "rewards/rejected": -33.57201385498047, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9341249646593158, |
| "grad_norm": 146.28409435329323, |
| "learning_rate": 1.0486677411402079e-08, |
| "logits/chosen": -1.1381806135177612, |
| "logits/rejected": -1.133017897605896, |
| "logps/chosen": -2.9692389965057373, |
| "logps/rejected": -3.5767297744750977, |
| "loss": 3.1506, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -29.69239044189453, |
| "rewards/margins": 6.074907302856445, |
| "rewards/rejected": -35.767295837402344, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9363867684478372, |
| "grad_norm": 159.6750432179662, |
| "learning_rate": 9.778838315744353e-09, |
| "logits/chosen": -1.1111695766448975, |
| "logits/rejected": -1.1009503602981567, |
| "logps/chosen": -2.921187162399292, |
| "logps/rejected": -3.49282169342041, |
| "loss": 2.4905, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -29.21187400817871, |
| "rewards/margins": 5.716343879699707, |
| "rewards/rejected": -34.92821502685547, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9386485722363584, |
| "grad_norm": 163.33116990865057, |
| "learning_rate": 9.095435060660595e-09, |
| "logits/chosen": -1.0523741245269775, |
| "logits/rejected": -1.0503299236297607, |
| "logps/chosen": -2.904160499572754, |
| "logps/rejected": -3.385972499847412, |
| "loss": 3.2821, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -29.041603088378906, |
| "rewards/margins": 4.818119525909424, |
| "rewards/rejected": -33.85972595214844, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9409103760248798, |
| "grad_norm": 172.67539274971105, |
| "learning_rate": 8.436510441197864e-09, |
| "logits/chosen": -1.0634400844573975, |
| "logits/rejected": -1.064286470413208, |
| "logps/chosen": -2.8385376930236816, |
| "logps/rejected": -3.3240842819213867, |
| "loss": 3.3055, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -28.385377883911133, |
| "rewards/margins": 4.855468273162842, |
| "rewards/rejected": -33.2408447265625, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9431721798134012, |
| "grad_norm": 176.03201940803845, |
| "learning_rate": 7.802105719539076e-09, |
| "logits/chosen": -1.0832864046096802, |
| "logits/rejected": -1.094434380531311, |
| "logps/chosen": -2.95005464553833, |
| "logps/rejected": -3.435476541519165, |
| "loss": 3.4372, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.500545501708984, |
| "rewards/margins": 4.854221820831299, |
| "rewards/rejected": -34.354766845703125, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9454339836019225, |
| "grad_norm": 140.1951037917173, |
| "learning_rate": 7.1922606224192e-09, |
| "logits/chosen": -1.1131281852722168, |
| "logits/rejected": -1.0919857025146484, |
| "logps/chosen": -2.904672861099243, |
| "logps/rejected": -3.4692986011505127, |
| "loss": 2.6629, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -29.046730041503906, |
| "rewards/margins": 5.646256923675537, |
| "rewards/rejected": -34.69298553466797, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9476957873904439, |
| "grad_norm": 158.29519351333673, |
| "learning_rate": 6.6070133386372906e-09, |
| "logits/chosen": -1.109086513519287, |
| "logits/rejected": -1.100806713104248, |
| "logps/chosen": -2.969877243041992, |
| "logps/rejected": -3.402845859527588, |
| "loss": 3.1048, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.698768615722656, |
| "rewards/margins": 4.3296895027160645, |
| "rewards/rejected": -34.02845764160156, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9499575911789653, |
| "grad_norm": 142.54144986382758, |
| "learning_rate": 6.046400516665384e-09, |
| "logits/chosen": -1.072888970375061, |
| "logits/rejected": -1.0681804418563843, |
| "logps/chosen": -2.8510801792144775, |
| "logps/rejected": -3.392737627029419, |
| "loss": 2.8594, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.51080322265625, |
| "rewards/margins": 5.416579246520996, |
| "rewards/rejected": -33.9273796081543, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9522193949674865, |
| "grad_norm": 167.23162652803958, |
| "learning_rate": 5.510457262353396e-09, |
| "logits/chosen": -1.0967669486999512, |
| "logits/rejected": -1.0917611122131348, |
| "logps/chosen": -2.810840606689453, |
| "logps/rejected": -3.335181474685669, |
| "loss": 2.8169, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.10840606689453, |
| "rewards/margins": 5.243409156799316, |
| "rewards/rejected": -33.35181427001953, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9544811987560079, |
| "grad_norm": 147.43492313171294, |
| "learning_rate": 4.9992171367309265e-09, |
| "logits/chosen": -1.0767607688903809, |
| "logits/rejected": -1.062929391860962, |
| "logps/chosen": -2.683845043182373, |
| "logps/rejected": -3.255852222442627, |
| "loss": 2.5143, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -26.83845329284668, |
| "rewards/margins": 5.720067977905273, |
| "rewards/rejected": -32.55852127075195, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9567430025445293, |
| "grad_norm": 154.264825757035, |
| "learning_rate": 4.5127121539052955e-09, |
| "logits/chosen": -1.1299694776535034, |
| "logits/rejected": -1.1185402870178223, |
| "logps/chosen": -2.9707369804382324, |
| "logps/rejected": -3.532411575317383, |
| "loss": 2.8685, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -29.70737075805664, |
| "rewards/margins": 5.616747856140137, |
| "rewards/rejected": -35.32411575317383, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9590048063330506, |
| "grad_norm": 140.7802126038013, |
| "learning_rate": 4.050972779057327e-09, |
| "logits/chosen": -1.0196279287338257, |
| "logits/rejected": -1.012292504310608, |
| "logps/chosen": -2.6845099925994873, |
| "logps/rejected": -3.226938009262085, |
| "loss": 3.0667, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.845102310180664, |
| "rewards/margins": 5.424278259277344, |
| "rewards/rejected": -32.269378662109375, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.961266610121572, |
| "grad_norm": 165.39326307614513, |
| "learning_rate": 3.6140279265330477e-09, |
| "logits/chosen": -1.066307783126831, |
| "logits/rejected": -1.0565274953842163, |
| "logps/chosen": -2.9191229343414307, |
| "logps/rejected": -3.4463143348693848, |
| "loss": 3.2063, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -29.191232681274414, |
| "rewards/margins": 5.271913051605225, |
| "rewards/rejected": -34.46314239501953, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9635284139100933, |
| "grad_norm": 173.22961492513315, |
| "learning_rate": 3.2019049580335853e-09, |
| "logits/chosen": -1.090395212173462, |
| "logits/rejected": -1.071869969367981, |
| "logps/chosen": -2.804586410522461, |
| "logps/rejected": -3.237943649291992, |
| "loss": 3.3588, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.045866012573242, |
| "rewards/margins": 4.3335723876953125, |
| "rewards/rejected": -32.37943649291992, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9657902176986146, |
| "grad_norm": 145.19510434395903, |
| "learning_rate": 2.814629680901337e-09, |
| "logits/chosen": -1.1221716403961182, |
| "logits/rejected": -1.1198689937591553, |
| "logps/chosen": -2.9432907104492188, |
| "logps/rejected": -3.467853546142578, |
| "loss": 2.7511, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -29.43290901184082, |
| "rewards/margins": 5.245627403259277, |
| "rewards/rejected": -34.67853546142578, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.968052021487136, |
| "grad_norm": 158.1903949721956, |
| "learning_rate": 2.4522263465041937e-09, |
| "logits/chosen": -1.0683190822601318, |
| "logits/rejected": -1.0715701580047607, |
| "logps/chosen": -2.909548044204712, |
| "logps/rejected": -3.454073667526245, |
| "loss": 2.6358, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -29.09547996520996, |
| "rewards/margins": 5.445255756378174, |
| "rewards/rejected": -34.540733337402344, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9703138252756573, |
| "grad_norm": 155.32881349911696, |
| "learning_rate": 2.114717648716713e-09, |
| "logits/chosen": -1.058469533920288, |
| "logits/rejected": -1.0475648641586304, |
| "logps/chosen": -2.8327419757843018, |
| "logps/rejected": -3.398642063140869, |
| "loss": 2.8237, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.327417373657227, |
| "rewards/margins": 5.659000396728516, |
| "rewards/rejected": -33.986419677734375, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9725756290641787, |
| "grad_norm": 151.9784262009179, |
| "learning_rate": 1.802124722499121e-09, |
| "logits/chosen": -1.082242488861084, |
| "logits/rejected": -1.0863622426986694, |
| "logps/chosen": -2.9595117568969727, |
| "logps/rejected": -3.589582920074463, |
| "loss": 2.8873, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -29.59511947631836, |
| "rewards/margins": 6.3007121086120605, |
| "rewards/rejected": -35.89583206176758, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9748374328527001, |
| "grad_norm": 145.2881020579959, |
| "learning_rate": 1.5144671425737499e-09, |
| "logits/chosen": -1.0939596891403198, |
| "logits/rejected": -1.0839340686798096, |
| "logps/chosen": -2.7935657501220703, |
| "logps/rejected": -3.3438332080841064, |
| "loss": 3.1053, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -27.93565559387207, |
| "rewards/margins": 5.502673149108887, |
| "rewards/rejected": -33.438331604003906, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9770992366412213, |
| "grad_norm": 142.85260765400977, |
| "learning_rate": 1.251762922199484e-09, |
| "logits/chosen": -1.0255465507507324, |
| "logits/rejected": -1.0279417037963867, |
| "logps/chosen": -2.9136672019958496, |
| "logps/rejected": -3.5454673767089844, |
| "loss": 2.2466, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -29.13667106628418, |
| "rewards/margins": 6.318003177642822, |
| "rewards/rejected": -35.454673767089844, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9793610404297427, |
| "grad_norm": 140.6251681228611, |
| "learning_rate": 1.0140285120433744e-09, |
| "logits/chosen": -1.0883697271347046, |
| "logits/rejected": -1.079869031906128, |
| "logps/chosen": -2.923574209213257, |
| "logps/rejected": -3.472318649291992, |
| "loss": 2.9511, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -29.235740661621094, |
| "rewards/margins": 5.48744535446167, |
| "rewards/rejected": -34.72319030761719, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9816228442182641, |
| "grad_norm": 163.4887730874021, |
| "learning_rate": 8.012787991508396e-10, |
| "logits/chosen": -1.0686748027801514, |
| "logits/rejected": -1.0695778131484985, |
| "logps/chosen": -2.8537275791168213, |
| "logps/rejected": -3.4509644508361816, |
| "loss": 2.8354, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -28.537277221679688, |
| "rewards/margins": 5.972366809844971, |
| "rewards/rejected": -34.5096435546875, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9838846480067854, |
| "grad_norm": 159.38860256761092, |
| "learning_rate": 6.135271060133007e-10, |
| "logits/chosen": -1.08330500125885, |
| "logits/rejected": -1.059489130973816, |
| "logps/chosen": -3.001162528991699, |
| "logps/rejected": -3.6113970279693604, |
| "loss": 2.7838, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -30.01162338256836, |
| "rewards/margins": 6.102348327636719, |
| "rewards/rejected": -36.11397171020508, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9861464517953068, |
| "grad_norm": 153.9713059574121, |
| "learning_rate": 4.50785189733871e-10, |
| "logits/chosen": -1.0712958574295044, |
| "logits/rejected": -1.07225501537323, |
| "logps/chosen": -2.815187454223633, |
| "logps/rejected": -3.376993179321289, |
| "loss": 2.5357, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -28.151872634887695, |
| "rewards/margins": 5.61806058883667, |
| "rewards/rejected": -33.76993179321289, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.988408255583828, |
| "grad_norm": 177.28777588642907, |
| "learning_rate": 3.1306324129118935e-10, |
| "logits/chosen": -1.074324369430542, |
| "logits/rejected": -1.068164348602295, |
| "logps/chosen": -3.001028299331665, |
| "logps/rejected": -3.464707612991333, |
| "loss": 3.0057, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -30.010284423828125, |
| "rewards/margins": 4.636792182922363, |
| "rewards/rejected": -34.64707565307617, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9906700593723494, |
| "grad_norm": 188.49091857123523, |
| "learning_rate": 2.003698849011748e-10, |
| "logits/chosen": -1.1169301271438599, |
| "logits/rejected": -1.1111122369766235, |
| "logps/chosen": -3.0199649333953857, |
| "logps/rejected": -3.4195683002471924, |
| "loss": 3.6605, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -30.199649810791016, |
| "rewards/margins": 3.996029853820801, |
| "rewards/rejected": -34.1956787109375, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9929318631608708, |
| "grad_norm": 161.39259146870793, |
| "learning_rate": 1.1271217747714779e-10, |
| "logits/chosen": -1.1197105646133423, |
| "logits/rejected": -1.123986005783081, |
| "logps/chosen": -2.9404468536376953, |
| "logps/rejected": -3.3876237869262695, |
| "loss": 3.4166, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -29.404468536376953, |
| "rewards/margins": 4.471774101257324, |
| "rewards/rejected": -33.87623977661133, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9951936669493922, |
| "grad_norm": 141.54707977256322, |
| "learning_rate": 5.0095608187739055e-11, |
| "logits/chosen": -1.0695970058441162, |
| "logits/rejected": -1.0684033632278442, |
| "logps/chosen": -2.7523577213287354, |
| "logps/rejected": -3.2570207118988037, |
| "loss": 2.9094, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -27.523576736450195, |
| "rewards/margins": 5.046632766723633, |
| "rewards/rejected": -32.57020950317383, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9974554707379135, |
| "grad_norm": 164.42489149506028, |
| "learning_rate": 1.2524098113209092e-11, |
| "logits/chosen": -1.1133098602294922, |
| "logits/rejected": -1.1184360980987549, |
| "logps/chosen": -3.0493438243865967, |
| "logps/rejected": -3.5772345066070557, |
| "loss": 2.9557, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -30.493436813354492, |
| "rewards/margins": 5.27890682220459, |
| "rewards/rejected": -35.77234649658203, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "grad_norm": 132.5425140912548, |
| "learning_rate": 0.0, |
| "logits/chosen": -1.1178230047225952, |
| "logits/rejected": -1.1037944555282593, |
| "logps/chosen": -2.8530402183532715, |
| "logps/rejected": -3.433281660079956, |
| "loss": 2.6473, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -28.530399322509766, |
| "rewards/margins": 5.80241584777832, |
| "rewards/rejected": -34.33281707763672, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "eval_logits/chosen": -1.0814050436019897, |
| "eval_logits/rejected": -1.0757393836975098, |
| "eval_logps/chosen": -2.8983895778656006, |
| "eval_logps/rejected": -3.4255356788635254, |
| "eval_loss": 2.930593490600586, |
| "eval_rewards/accuracies": 0.7573529481887817, |
| "eval_rewards/chosen": -28.983896255493164, |
| "eval_rewards/margins": 5.271461486816406, |
| "eval_rewards/rejected": -34.25535583496094, |
| "eval_runtime": 100.5751, |
| "eval_samples_per_second": 29.62, |
| "eval_steps_per_second": 1.859, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "step": 442, |
| "total_flos": 134366991482880.0, |
| "train_loss": 3.5634871910060695, |
| "train_runtime": 7608.7168, |
| "train_samples_per_second": 7.438, |
| "train_steps_per_second": 0.058 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 442, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 134366991482880.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|