| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997172745264349, |
| "eval_steps": 500, |
| "global_step": 442, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022618037885213456, |
| "grad_norm": 82.20633944321844, |
| "learning_rate": 1.7777777777777777e-08, |
| "logits/chosen": -1.0218509435653687, |
| "logits/rejected": -0.9817585349082947, |
| "logps/chosen": -1.5688527822494507, |
| "logps/rejected": -1.7071683406829834, |
| "loss": 5.3243, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -15.688528060913086, |
| "rewards/margins": 1.3831559419631958, |
| "rewards/rejected": -17.071683883666992, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004523607577042691, |
| "grad_norm": 76.56269102716989, |
| "learning_rate": 3.5555555555555554e-08, |
| "logits/chosen": -1.0730209350585938, |
| "logits/rejected": -1.0666016340255737, |
| "logps/chosen": -1.6668750047683716, |
| "logps/rejected": -1.6219017505645752, |
| "loss": 6.0119, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -16.66874885559082, |
| "rewards/margins": -0.44973334670066833, |
| "rewards/rejected": -16.21901512145996, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006785411365564037, |
| "grad_norm": 80.7872495859453, |
| "learning_rate": 5.333333333333333e-08, |
| "logits/chosen": -1.0274062156677246, |
| "logits/rejected": -1.0491466522216797, |
| "logps/chosen": -1.5775692462921143, |
| "logps/rejected": -1.8128482103347778, |
| "loss": 5.5709, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -15.775691986083984, |
| "rewards/margins": 2.3527896404266357, |
| "rewards/rejected": -18.128482818603516, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.009047215154085382, |
| "grad_norm": 93.5787326834387, |
| "learning_rate": 7.111111111111111e-08, |
| "logits/chosen": -1.0509157180786133, |
| "logits/rejected": -1.028970718383789, |
| "logps/chosen": -1.6293585300445557, |
| "logps/rejected": -1.6357903480529785, |
| "loss": 5.7155, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -16.29358673095703, |
| "rewards/margins": 0.06431838870048523, |
| "rewards/rejected": -16.35790252685547, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01130901894260673, |
| "grad_norm": 65.5120890183917, |
| "learning_rate": 8.888888888888888e-08, |
| "logits/chosen": -1.0577445030212402, |
| "logits/rejected": -1.0238444805145264, |
| "logps/chosen": -1.5818334817886353, |
| "logps/rejected": -1.6137027740478516, |
| "loss": 5.376, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -15.818334579467773, |
| "rewards/margins": 0.3186935782432556, |
| "rewards/rejected": -16.137027740478516, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013570822731128074, |
| "grad_norm": 88.31949377933185, |
| "learning_rate": 1.0666666666666666e-07, |
| "logits/chosen": -1.118450403213501, |
| "logits/rejected": -1.101908802986145, |
| "logps/chosen": -1.639064908027649, |
| "logps/rejected": -1.5965094566345215, |
| "loss": 6.1786, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": -16.390649795532227, |
| "rewards/margins": -0.4255555272102356, |
| "rewards/rejected": -15.965093612670898, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01583262651964942, |
| "grad_norm": 47.1722172267689, |
| "learning_rate": 1.2444444444444443e-07, |
| "logits/chosen": -1.0451990365982056, |
| "logits/rejected": -1.0316812992095947, |
| "logps/chosen": -1.493871808052063, |
| "logps/rejected": -1.7871618270874023, |
| "loss": 4.0395, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -14.93871784210205, |
| "rewards/margins": 2.932901620864868, |
| "rewards/rejected": -17.871618270874023, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.018094430308170765, |
| "grad_norm": 104.42110239584217, |
| "learning_rate": 1.4222222222222222e-07, |
| "logits/chosen": -1.0138366222381592, |
| "logits/rejected": -1.02739679813385, |
| "logps/chosen": -1.5772916078567505, |
| "logps/rejected": -1.6482716798782349, |
| "loss": 5.6936, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -15.772916793823242, |
| "rewards/margins": 0.7097985744476318, |
| "rewards/rejected": -16.482715606689453, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.020356234096692113, |
| "grad_norm": 68.67675316355087, |
| "learning_rate": 1.6e-07, |
| "logits/chosen": -1.0876085758209229, |
| "logits/rejected": -1.0700544118881226, |
| "logps/chosen": -1.523546576499939, |
| "logps/rejected": -1.7054587602615356, |
| "loss": 5.3456, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -15.235466957092285, |
| "rewards/margins": 1.819122552871704, |
| "rewards/rejected": -17.054588317871094, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02261803788521346, |
| "grad_norm": 94.5983360056872, |
| "learning_rate": 1.7777777777777776e-07, |
| "logits/chosen": -1.0795514583587646, |
| "logits/rejected": -1.0769304037094116, |
| "logps/chosen": -1.6664490699768066, |
| "logps/rejected": -1.6543259620666504, |
| "loss": 5.6149, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -16.66448974609375, |
| "rewards/margins": -0.12123118340969086, |
| "rewards/rejected": -16.543258666992188, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.024879841673734804, |
| "grad_norm": 88.34270236157406, |
| "learning_rate": 1.9555555555555555e-07, |
| "logits/chosen": -1.1123073101043701, |
| "logits/rejected": -1.1089990139007568, |
| "logps/chosen": -1.6528065204620361, |
| "logps/rejected": -1.7993955612182617, |
| "loss": 5.3148, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -16.528064727783203, |
| "rewards/margins": 1.4658915996551514, |
| "rewards/rejected": -17.993955612182617, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02714164546225615, |
| "grad_norm": 96.98065425868009, |
| "learning_rate": 2.133333333333333e-07, |
| "logits/chosen": -1.0383970737457275, |
| "logits/rejected": -1.0250272750854492, |
| "logps/chosen": -1.7430694103240967, |
| "logps/rejected": -1.8183331489562988, |
| "loss": 5.9778, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -17.430696487426758, |
| "rewards/margins": 0.7526383399963379, |
| "rewards/rejected": -18.183332443237305, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.029403449250777494, |
| "grad_norm": 198.0750276907948, |
| "learning_rate": 2.3111111111111107e-07, |
| "logits/chosen": -1.0285305976867676, |
| "logits/rejected": -0.9945288300514221, |
| "logps/chosen": -1.6881521940231323, |
| "logps/rejected": -1.614635705947876, |
| "loss": 5.9679, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -16.88152313232422, |
| "rewards/margins": -0.735164999961853, |
| "rewards/rejected": -16.1463565826416, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03166525303929884, |
| "grad_norm": 46.47950957800464, |
| "learning_rate": 2.4888888888888886e-07, |
| "logits/chosen": -1.040562391281128, |
| "logits/rejected": -1.0334664583206177, |
| "logps/chosen": -1.3813308477401733, |
| "logps/rejected": -1.5766117572784424, |
| "loss": 4.3908, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.813308715820312, |
| "rewards/margins": 1.9528083801269531, |
| "rewards/rejected": -15.766117095947266, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.033927056827820185, |
| "grad_norm": 69.33452676748747, |
| "learning_rate": 2.666666666666666e-07, |
| "logits/chosen": -1.0274150371551514, |
| "logits/rejected": -1.007089614868164, |
| "logps/chosen": -1.5167428255081177, |
| "logps/rejected": -1.6422500610351562, |
| "loss": 4.8089, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -15.16742992401123, |
| "rewards/margins": 1.255070686340332, |
| "rewards/rejected": -16.422500610351562, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03618886061634153, |
| "grad_norm": 72.5316696572694, |
| "learning_rate": 2.8444444444444443e-07, |
| "logits/chosen": -1.0711323022842407, |
| "logits/rejected": -1.0499267578125, |
| "logps/chosen": -1.5601518154144287, |
| "logps/rejected": -1.5947811603546143, |
| "loss": 5.6696, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -15.601516723632812, |
| "rewards/margins": 0.3462938070297241, |
| "rewards/rejected": -15.947811126708984, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.038450664404862875, |
| "grad_norm": 76.85732812220978, |
| "learning_rate": 3.022222222222222e-07, |
| "logits/chosen": -1.094878911972046, |
| "logits/rejected": -1.067216157913208, |
| "logps/chosen": -1.486512541770935, |
| "logps/rejected": -1.4571260213851929, |
| "loss": 6.1338, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -14.86512565612793, |
| "rewards/margins": -0.29386693239212036, |
| "rewards/rejected": -14.571259498596191, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04071246819338423, |
| "grad_norm": 92.61472209561433, |
| "learning_rate": 3.2e-07, |
| "logits/chosen": -1.0927892923355103, |
| "logits/rejected": -1.0815608501434326, |
| "logps/chosen": -1.887829303741455, |
| "logps/rejected": -1.9170186519622803, |
| "loss": 5.9261, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -18.878293991088867, |
| "rewards/margins": 0.29189303517341614, |
| "rewards/rejected": -19.17018699645996, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04297427198190557, |
| "grad_norm": 67.495547378849, |
| "learning_rate": 3.3777777777777777e-07, |
| "logits/chosen": -1.1162761449813843, |
| "logits/rejected": -1.10536527633667, |
| "logps/chosen": -1.6044241189956665, |
| "logps/rejected": -1.6690468788146973, |
| "loss": 5.0572, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -16.044240951538086, |
| "rewards/margins": 0.6462277173995972, |
| "rewards/rejected": -16.690467834472656, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04523607577042692, |
| "grad_norm": 46.08429182217552, |
| "learning_rate": 3.5555555555555553e-07, |
| "logits/chosen": -1.1088039875030518, |
| "logits/rejected": -1.091164231300354, |
| "logps/chosen": -1.4526840448379517, |
| "logps/rejected": -1.682389736175537, |
| "loss": 4.2789, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -14.526841163635254, |
| "rewards/margins": 2.297055721282959, |
| "rewards/rejected": -16.823896408081055, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04749787955894826, |
| "grad_norm": 81.54476641866651, |
| "learning_rate": 3.7333333333333334e-07, |
| "logits/chosen": -1.0202794075012207, |
| "logits/rejected": -1.0125457048416138, |
| "logps/chosen": -1.5033711194992065, |
| "logps/rejected": -1.515246868133545, |
| "loss": 5.2707, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -15.033713340759277, |
| "rewards/margins": 0.11875671148300171, |
| "rewards/rejected": -15.152469635009766, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04975968334746961, |
| "grad_norm": 77.44489528697936, |
| "learning_rate": 3.911111111111111e-07, |
| "logits/chosen": -1.0691096782684326, |
| "logits/rejected": -1.0430610179901123, |
| "logps/chosen": -1.5314964056015015, |
| "logps/rejected": -1.6080042123794556, |
| "loss": 4.5538, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -15.314962387084961, |
| "rewards/margins": 0.7650798559188843, |
| "rewards/rejected": -16.080041885375977, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05202148713599095, |
| "grad_norm": 72.65865482531491, |
| "learning_rate": 4.0888888888888886e-07, |
| "logits/chosen": -1.0519309043884277, |
| "logits/rejected": -1.0246856212615967, |
| "logps/chosen": -1.5432385206222534, |
| "logps/rejected": -1.5514321327209473, |
| "loss": 5.6642, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -15.432385444641113, |
| "rewards/margins": 0.08193567395210266, |
| "rewards/rejected": -15.514322280883789, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0542832909245123, |
| "grad_norm": 61.554723407219434, |
| "learning_rate": 4.266666666666666e-07, |
| "logits/chosen": -1.0789406299591064, |
| "logits/rejected": -1.0445177555084229, |
| "logps/chosen": -1.3729901313781738, |
| "logps/rejected": -1.4575581550598145, |
| "loss": 4.8554, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -13.729900360107422, |
| "rewards/margins": 0.8456806540489197, |
| "rewards/rejected": -14.575581550598145, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05654509471303364, |
| "grad_norm": 92.24212352163143, |
| "learning_rate": 4.4444444444444444e-07, |
| "logits/chosen": -1.0634639263153076, |
| "logits/rejected": -1.0437037944793701, |
| "logps/chosen": -1.6674749851226807, |
| "logps/rejected": -1.6732288599014282, |
| "loss": 5.5718, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -16.674747467041016, |
| "rewards/margins": 0.057538360357284546, |
| "rewards/rejected": -16.73228645324707, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05880689850155499, |
| "grad_norm": 82.27286171700929, |
| "learning_rate": 4.6222222222222214e-07, |
| "logits/chosen": -1.0726224184036255, |
| "logits/rejected": -1.042041301727295, |
| "logps/chosen": -1.6516973972320557, |
| "logps/rejected": -1.645316481590271, |
| "loss": 5.4824, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -16.5169734954834, |
| "rewards/margins": -0.06380730867385864, |
| "rewards/rejected": -16.453166961669922, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.061068702290076333, |
| "grad_norm": 71.37945583756039, |
| "learning_rate": 4.8e-07, |
| "logits/chosen": -1.0674571990966797, |
| "logits/rejected": -1.0658835172653198, |
| "logps/chosen": -1.6513671875, |
| "logps/rejected": -1.5918333530426025, |
| "loss": 5.9622, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -16.513669967651367, |
| "rewards/margins": -0.5953378677368164, |
| "rewards/rejected": -15.918333053588867, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06333050607859768, |
| "grad_norm": 62.92038574422204, |
| "learning_rate": 4.977777777777777e-07, |
| "logits/chosen": -1.069338083267212, |
| "logits/rejected": -1.0540227890014648, |
| "logps/chosen": -1.5345242023468018, |
| "logps/rejected": -1.580329418182373, |
| "loss": 5.6612, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -15.345240592956543, |
| "rewards/margins": 0.45805394649505615, |
| "rewards/rejected": -15.803295135498047, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06559230986711903, |
| "grad_norm": 75.36368108465321, |
| "learning_rate": 5.155555555555556e-07, |
| "logits/chosen": -1.0797611474990845, |
| "logits/rejected": -1.0751529932022095, |
| "logps/chosen": -1.5848275423049927, |
| "logps/rejected": -1.5977083444595337, |
| "loss": 5.4094, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -15.848276138305664, |
| "rewards/margins": 0.1288076639175415, |
| "rewards/rejected": -15.977083206176758, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06785411365564037, |
| "grad_norm": 82.33258282824598, |
| "learning_rate": 5.333333333333332e-07, |
| "logits/chosen": -1.0502032041549683, |
| "logits/rejected": -1.0390020608901978, |
| "logps/chosen": -1.5757882595062256, |
| "logps/rejected": -1.5970449447631836, |
| "loss": 5.802, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -15.757882118225098, |
| "rewards/margins": 0.21256688237190247, |
| "rewards/rejected": -15.970449447631836, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07011591744416172, |
| "grad_norm": 79.89049476585055, |
| "learning_rate": 5.511111111111111e-07, |
| "logits/chosen": -1.0897246599197388, |
| "logits/rejected": -1.0813220739364624, |
| "logps/chosen": -1.556617021560669, |
| "logps/rejected": -1.5337543487548828, |
| "loss": 5.4678, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -15.566169738769531, |
| "rewards/margins": -0.2286262959241867, |
| "rewards/rejected": -15.337542533874512, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07237772123268306, |
| "grad_norm": 73.16917555470789, |
| "learning_rate": 5.688888888888889e-07, |
| "logits/chosen": -1.1192123889923096, |
| "logits/rejected": -1.109642744064331, |
| "logps/chosen": -1.4663976430892944, |
| "logps/rejected": -1.4524457454681396, |
| "loss": 5.5922, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -14.663975715637207, |
| "rewards/margins": -0.1395174264907837, |
| "rewards/rejected": -14.524457931518555, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07463952502120441, |
| "grad_norm": 62.031723886215296, |
| "learning_rate": 5.866666666666666e-07, |
| "logits/chosen": -1.0371233224868774, |
| "logits/rejected": -1.0098799467086792, |
| "logps/chosen": -1.4417638778686523, |
| "logps/rejected": -1.5921223163604736, |
| "loss": 4.6202, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -14.417638778686523, |
| "rewards/margins": 1.5035836696624756, |
| "rewards/rejected": -15.921221733093262, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07690132880972575, |
| "grad_norm": 51.2296834877244, |
| "learning_rate": 6.044444444444444e-07, |
| "logits/chosen": -1.0844844579696655, |
| "logits/rejected": -1.0409635305404663, |
| "logps/chosen": -1.3941082954406738, |
| "logps/rejected": -1.51826810836792, |
| "loss": 4.462, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -13.941082000732422, |
| "rewards/margins": 1.2415988445281982, |
| "rewards/rejected": -15.182682037353516, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0791631325982471, |
| "grad_norm": 96.14883019900049, |
| "learning_rate": 6.222222222222223e-07, |
| "logits/chosen": -1.1195001602172852, |
| "logits/rejected": -1.0811963081359863, |
| "logps/chosen": -1.518474817276001, |
| "logps/rejected": -1.636244297027588, |
| "loss": 4.8592, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -15.184747695922852, |
| "rewards/margins": 1.1776940822601318, |
| "rewards/rejected": -16.362442016601562, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08142493638676845, |
| "grad_norm": 58.6930145213695, |
| "learning_rate": 6.4e-07, |
| "logits/chosen": -1.0167288780212402, |
| "logits/rejected": -1.0025566816329956, |
| "logps/chosen": -1.438754677772522, |
| "logps/rejected": -1.5819575786590576, |
| "loss": 4.6308, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -14.387547492980957, |
| "rewards/margins": 1.4320300817489624, |
| "rewards/rejected": -15.819576263427734, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08368674017528979, |
| "grad_norm": 68.52944989879505, |
| "learning_rate": 6.577777777777777e-07, |
| "logits/chosen": -1.0579607486724854, |
| "logits/rejected": -1.0365407466888428, |
| "logps/chosen": -1.3250023126602173, |
| "logps/rejected": -1.3734800815582275, |
| "loss": 4.806, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -13.250021934509277, |
| "rewards/margins": 0.4847772419452667, |
| "rewards/rejected": -13.734800338745117, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08594854396381114, |
| "grad_norm": 77.13280631717205, |
| "learning_rate": 6.755555555555555e-07, |
| "logits/chosen": -1.1369915008544922, |
| "logits/rejected": -1.1310396194458008, |
| "logps/chosen": -1.4429665803909302, |
| "logps/rejected": -1.502832055091858, |
| "loss": 4.7471, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -14.429665565490723, |
| "rewards/margins": 0.5986539125442505, |
| "rewards/rejected": -15.0283203125, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08821034775233248, |
| "grad_norm": 72.97486900799309, |
| "learning_rate": 6.933333333333333e-07, |
| "logits/chosen": -1.1206347942352295, |
| "logits/rejected": -1.1240882873535156, |
| "logps/chosen": -1.4795533418655396, |
| "logps/rejected": -1.5203282833099365, |
| "loss": 4.953, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -14.7955322265625, |
| "rewards/margins": 0.40774989128112793, |
| "rewards/rejected": -15.20328140258789, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.09047215154085383, |
| "grad_norm": 52.88449320217243, |
| "learning_rate": 7.111111111111111e-07, |
| "logits/chosen": -1.0798068046569824, |
| "logits/rejected": -1.043776512145996, |
| "logps/chosen": -1.3615009784698486, |
| "logps/rejected": -1.5376112461090088, |
| "loss": 3.9307, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -13.615011215209961, |
| "rewards/margins": 1.761101245880127, |
| "rewards/rejected": -15.37611198425293, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09273395532937517, |
| "grad_norm": 44.35065713868699, |
| "learning_rate": 7.288888888888888e-07, |
| "logits/chosen": -1.1410603523254395, |
| "logits/rejected": -1.107545256614685, |
| "logps/chosen": -1.3647446632385254, |
| "logps/rejected": -1.4958362579345703, |
| "loss": 4.5188, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -13.647444725036621, |
| "rewards/margins": 1.3109173774719238, |
| "rewards/rejected": -14.95836353302002, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09499575911789652, |
| "grad_norm": 62.52516819672327, |
| "learning_rate": 7.466666666666667e-07, |
| "logits/chosen": -1.024010419845581, |
| "logits/rejected": -0.980333685874939, |
| "logps/chosen": -1.4068825244903564, |
| "logps/rejected": -1.5373163223266602, |
| "loss": 4.6958, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -14.068826675415039, |
| "rewards/margins": 1.3043370246887207, |
| "rewards/rejected": -15.373163223266602, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09725756290641786, |
| "grad_norm": 75.14472900675813, |
| "learning_rate": 7.644444444444444e-07, |
| "logits/chosen": -1.0994362831115723, |
| "logits/rejected": -1.0756360292434692, |
| "logps/chosen": -1.3996585607528687, |
| "logps/rejected": -1.4580457210540771, |
| "loss": 4.6856, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -13.996583938598633, |
| "rewards/margins": 0.5838702917098999, |
| "rewards/rejected": -14.58045482635498, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09951936669493922, |
| "grad_norm": 54.0175172423554, |
| "learning_rate": 7.822222222222222e-07, |
| "logits/chosen": -1.0326025485992432, |
| "logits/rejected": -1.0035631656646729, |
| "logps/chosen": -1.2723063230514526, |
| "logps/rejected": -1.3909995555877686, |
| "loss": 4.3197, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -12.723064422607422, |
| "rewards/margins": 1.1869314908981323, |
| "rewards/rejected": -13.909995079040527, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10178117048346055, |
| "grad_norm": 50.25629417898048, |
| "learning_rate": 8e-07, |
| "logits/chosen": -1.0961617231369019, |
| "logits/rejected": -1.079810619354248, |
| "logps/chosen": -1.3260252475738525, |
| "logps/rejected": -1.4084868431091309, |
| "loss": 4.662, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.260252952575684, |
| "rewards/margins": 0.8246161937713623, |
| "rewards/rejected": -14.084867477416992, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1040429742719819, |
| "grad_norm": 96.18305334292586, |
| "learning_rate": 7.999874759018868e-07, |
| "logits/chosen": -1.117138147354126, |
| "logits/rejected": -1.0853725671768188, |
| "logps/chosen": -1.529714584350586, |
| "logps/rejected": -1.6374049186706543, |
| "loss": 4.7187, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -15.29714584350586, |
| "rewards/margins": 1.076902151107788, |
| "rewards/rejected": -16.374048233032227, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10630477806050326, |
| "grad_norm": 50.552602097177946, |
| "learning_rate": 7.999499043918123e-07, |
| "logits/chosen": -1.1253491640090942, |
| "logits/rejected": -1.1363474130630493, |
| "logps/chosen": -1.4145984649658203, |
| "logps/rejected": -1.4883978366851807, |
| "loss": 4.8257, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -14.14598274230957, |
| "rewards/margins": 0.7379940748214722, |
| "rewards/rejected": -14.883977890014648, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1085665818490246, |
| "grad_norm": 56.426748642311836, |
| "learning_rate": 7.998872878225228e-07, |
| "logits/chosen": -1.0526834726333618, |
| "logits/rejected": -1.043718695640564, |
| "logps/chosen": -1.4263412952423096, |
| "logps/rejected": -1.5427813529968262, |
| "loss": 4.4355, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -14.263413429260254, |
| "rewards/margins": 1.1644010543823242, |
| "rewards/rejected": -15.427813529968262, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11082838563754595, |
| "grad_norm": 55.22325931071655, |
| "learning_rate": 7.997996301150987e-07, |
| "logits/chosen": -1.042626142501831, |
| "logits/rejected": -1.0421124696731567, |
| "logps/chosen": -1.3600575923919678, |
| "logps/rejected": -1.4976643323898315, |
| "loss": 4.2471, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -13.600576400756836, |
| "rewards/margins": 1.3760665655136108, |
| "rewards/rejected": -14.976642608642578, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11309018942606729, |
| "grad_norm": 63.88977382203578, |
| "learning_rate": 7.996869367587088e-07, |
| "logits/chosen": -1.0255780220031738, |
| "logits/rejected": -1.0129433870315552, |
| "logps/chosen": -1.368567943572998, |
| "logps/rejected": -1.4815213680267334, |
| "loss": 4.5417, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -13.685680389404297, |
| "rewards/margins": 1.1295346021652222, |
| "rewards/rejected": -14.815214157104492, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11535199321458864, |
| "grad_norm": 49.558562568767535, |
| "learning_rate": 7.99549214810266e-07, |
| "logits/chosen": -1.0174915790557861, |
| "logits/rejected": -1.01373291015625, |
| "logps/chosen": -1.417051911354065, |
| "logps/rejected": -1.5000643730163574, |
| "loss": 4.6103, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -14.170519828796387, |
| "rewards/margins": 0.8301246166229248, |
| "rewards/rejected": -15.00064468383789, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11761379700310998, |
| "grad_norm": 59.766954657505124, |
| "learning_rate": 7.993864728939867e-07, |
| "logits/chosen": -1.0445855855941772, |
| "logits/rejected": -1.002925157546997, |
| "logps/chosen": -1.3486438989639282, |
| "logps/rejected": -1.462002158164978, |
| "loss": 4.7086, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -13.486440658569336, |
| "rewards/margins": 1.1335809230804443, |
| "rewards/rejected": -14.620019912719727, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11987560079163133, |
| "grad_norm": 61.66347742848864, |
| "learning_rate": 7.991987212008491e-07, |
| "logits/chosen": -1.0562191009521484, |
| "logits/rejected": -1.0350170135498047, |
| "logps/chosen": -1.4400745630264282, |
| "logps/rejected": -1.6580910682678223, |
| "loss": 4.1834, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -14.400745391845703, |
| "rewards/margins": 2.1801652908325195, |
| "rewards/rejected": -16.580909729003906, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12213740458015267, |
| "grad_norm": 65.57057337291207, |
| "learning_rate": 7.989859714879565e-07, |
| "logits/chosen": -1.1002763509750366, |
| "logits/rejected": -1.066996693611145, |
| "logps/chosen": -1.4376161098480225, |
| "logps/rejected": -1.5005946159362793, |
| "loss": 4.9236, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -14.376161575317383, |
| "rewards/margins": 0.6297858357429504, |
| "rewards/rejected": -15.00594711303711, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12439920836867402, |
| "grad_norm": 55.513799760876196, |
| "learning_rate": 7.987482370778005e-07, |
| "logits/chosen": -1.0518475770950317, |
| "logits/rejected": -1.027663230895996, |
| "logps/chosen": -1.4021294116973877, |
| "logps/rejected": -1.530908465385437, |
| "loss": 4.5889, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -14.021293640136719, |
| "rewards/margins": 1.287792444229126, |
| "rewards/rejected": -15.309085845947266, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.12666101215719536, |
| "grad_norm": 75.90464831673606, |
| "learning_rate": 7.984855328574262e-07, |
| "logits/chosen": -0.9373297691345215, |
| "logits/rejected": -0.9265519380569458, |
| "logps/chosen": -1.3669638633728027, |
| "logps/rejected": -1.4640941619873047, |
| "loss": 4.55, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -13.669638633728027, |
| "rewards/margins": 0.9713033437728882, |
| "rewards/rejected": -14.640941619873047, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1289228159457167, |
| "grad_norm": 68.96445829802559, |
| "learning_rate": 7.981978752775009e-07, |
| "logits/chosen": -0.9993575811386108, |
| "logits/rejected": -0.9976133704185486, |
| "logps/chosen": -1.397598385810852, |
| "logps/rejected": -1.541737675666809, |
| "loss": 4.59, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.975984573364258, |
| "rewards/margins": 1.4413925409317017, |
| "rewards/rejected": -15.417377471923828, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13118461973423806, |
| "grad_norm": 80.54101053736677, |
| "learning_rate": 7.978852823512833e-07, |
| "logits/chosen": -1.0546932220458984, |
| "logits/rejected": -1.0237070322036743, |
| "logps/chosen": -1.5197957754135132, |
| "logps/rejected": -1.6809276342391968, |
| "loss": 4.5398, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -15.197957992553711, |
| "rewards/margins": 1.6113194227218628, |
| "rewards/rejected": -16.809276580810547, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1334464235227594, |
| "grad_norm": 92.89373223689378, |
| "learning_rate": 7.975477736534957e-07, |
| "logits/chosen": -1.0505645275115967, |
| "logits/rejected": -1.0457043647766113, |
| "logps/chosen": -1.45807945728302, |
| "logps/rejected": -1.6856966018676758, |
| "loss": 4.3795, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -14.580793380737305, |
| "rewards/margins": 2.276172637939453, |
| "rewards/rejected": -16.856966018676758, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13570822731128074, |
| "grad_norm": 86.59524139517637, |
| "learning_rate": 7.971853703190986e-07, |
| "logits/chosen": -1.0514953136444092, |
| "logits/rejected": -1.0330663919448853, |
| "logps/chosen": -1.4829269647598267, |
| "logps/rejected": -1.656957983970642, |
| "loss": 4.4199, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -14.829270362854004, |
| "rewards/margins": 1.740309715270996, |
| "rewards/rejected": -16.569580078125, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1379700310998021, |
| "grad_norm": 58.04746598511771, |
| "learning_rate": 7.967980950419664e-07, |
| "logits/chosen": -0.9681941270828247, |
| "logits/rejected": -0.9625906348228455, |
| "logps/chosen": -1.4017956256866455, |
| "logps/rejected": -1.5850591659545898, |
| "loss": 4.1725, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -14.01795768737793, |
| "rewards/margins": 1.8326325416564941, |
| "rewards/rejected": -15.850589752197266, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.14023183488832344, |
| "grad_norm": 75.43808245271408, |
| "learning_rate": 7.963859720734669e-07, |
| "logits/chosen": -1.0596580505371094, |
| "logits/rejected": -1.0476895570755005, |
| "logps/chosen": -1.3745746612548828, |
| "logps/rejected": -1.5092250108718872, |
| "loss": 4.634, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -13.745745658874512, |
| "rewards/margins": 1.3465046882629395, |
| "rewards/rejected": -15.09225082397461, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14249363867684478, |
| "grad_norm": 52.97237108595383, |
| "learning_rate": 7.959490272209427e-07, |
| "logits/chosen": -1.0601532459259033, |
| "logits/rejected": -1.01606023311615, |
| "logps/chosen": -1.3559281826019287, |
| "logps/rejected": -1.6258801221847534, |
| "loss": 3.7898, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -13.559282302856445, |
| "rewards/margins": 2.6995184421539307, |
| "rewards/rejected": -16.258800506591797, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.14475544246536612, |
| "grad_norm": 52.737070381425056, |
| "learning_rate": 7.954872878460946e-07, |
| "logits/chosen": -1.0653126239776611, |
| "logits/rejected": -1.0284615755081177, |
| "logps/chosen": -1.3986772298812866, |
| "logps/rejected": -1.6275815963745117, |
| "loss": 3.9627, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -13.986770629882812, |
| "rewards/margins": 2.2890453338623047, |
| "rewards/rejected": -16.275815963745117, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.14701724625388748, |
| "grad_norm": 64.78973484996234, |
| "learning_rate": 7.950007828632691e-07, |
| "logits/chosen": -1.0072953701019287, |
| "logits/rejected": -1.0003857612609863, |
| "logps/chosen": -1.4827585220336914, |
| "logps/rejected": -1.7651684284210205, |
| "loss": 4.0168, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -14.82758617401123, |
| "rewards/margins": 2.8241007328033447, |
| "rewards/rejected": -17.65168571472168, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14927905004240882, |
| "grad_norm": 48.16068590765694, |
| "learning_rate": 7.944895427376465e-07, |
| "logits/chosen": -1.0329233407974243, |
| "logits/rejected": -1.0038236379623413, |
| "logps/chosen": -1.4716813564300537, |
| "logps/rejected": -1.7506184577941895, |
| "loss": 4.0156, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -14.716813087463379, |
| "rewards/margins": 2.789372682571411, |
| "rewards/rejected": -17.506183624267578, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15154085383093016, |
| "grad_norm": 52.28364686992911, |
| "learning_rate": 7.939535994833345e-07, |
| "logits/chosen": -0.9820632338523865, |
| "logits/rejected": -0.9789884686470032, |
| "logps/chosen": -1.3938853740692139, |
| "logps/rejected": -1.6392000913619995, |
| "loss": 4.1279, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -13.938854217529297, |
| "rewards/margins": 2.4531469345092773, |
| "rewards/rejected": -16.392000198364258, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1538026576194515, |
| "grad_norm": 59.96935116096323, |
| "learning_rate": 7.933929866613628e-07, |
| "logits/chosen": -1.002638339996338, |
| "logits/rejected": -1.0027940273284912, |
| "logps/chosen": -1.450176477432251, |
| "logps/rejected": -1.5936009883880615, |
| "loss": 4.3626, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -14.501765251159668, |
| "rewards/margins": 1.4342446327209473, |
| "rewards/rejected": -15.936010360717773, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15606446140797287, |
| "grad_norm": 69.75195635586249, |
| "learning_rate": 7.928077393775808e-07, |
| "logits/chosen": -0.9776244163513184, |
| "logits/rejected": -0.9800903797149658, |
| "logps/chosen": -1.4521443843841553, |
| "logps/rejected": -1.7846593856811523, |
| "loss": 3.586, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -14.521443367004395, |
| "rewards/margins": 3.3251500129699707, |
| "rewards/rejected": -17.846593856811523, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.1583262651964942, |
| "grad_norm": 77.74229614088146, |
| "learning_rate": 7.921978942804609e-07, |
| "logits/chosen": -0.9629479050636292, |
| "logits/rejected": -0.9628820419311523, |
| "logps/chosen": -1.4668632745742798, |
| "logps/rejected": -1.7327969074249268, |
| "loss": 3.7939, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -14.668632507324219, |
| "rewards/margins": 2.659337282180786, |
| "rewards/rejected": -17.32796859741211, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.16058806898501554, |
| "grad_norm": 73.23733622048438, |
| "learning_rate": 7.915634895588021e-07, |
| "logits/chosen": -1.003068447113037, |
| "logits/rejected": -0.9916574954986572, |
| "logps/chosen": -1.5601112842559814, |
| "logps/rejected": -1.733612060546875, |
| "loss": 4.3674, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -15.601114273071289, |
| "rewards/margins": 1.735007405281067, |
| "rewards/rejected": -17.33612060546875, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1628498727735369, |
| "grad_norm": 75.55585019277092, |
| "learning_rate": 7.909045649393394e-07, |
| "logits/chosen": -1.040993332862854, |
| "logits/rejected": -1.0444133281707764, |
| "logps/chosen": -1.4949562549591064, |
| "logps/rejected": -1.6046648025512695, |
| "loss": 4.5456, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -14.949562072753906, |
| "rewards/margins": 1.0970847606658936, |
| "rewards/rejected": -16.046646118164062, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.16511167656205825, |
| "grad_norm": 65.91136959416728, |
| "learning_rate": 7.902211616842556e-07, |
| "logits/chosen": -1.0223642587661743, |
| "logits/rejected": -1.014294147491455, |
| "logps/chosen": -1.514482021331787, |
| "logps/rejected": -1.7560579776763916, |
| "loss": 4.1924, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -15.144821166992188, |
| "rewards/margins": 2.4157588481903076, |
| "rewards/rejected": -17.56058120727539, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16737348035057958, |
| "grad_norm": 68.14881598964276, |
| "learning_rate": 7.89513322588598e-07, |
| "logits/chosen": -1.006181001663208, |
| "logits/rejected": -0.9941987991333008, |
| "logps/chosen": -1.5036894083023071, |
| "logps/rejected": -1.683767318725586, |
| "loss": 4.0427, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -15.036893844604492, |
| "rewards/margins": 1.800781011581421, |
| "rewards/rejected": -16.83767318725586, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16963528413910092, |
| "grad_norm": 54.7050909412455, |
| "learning_rate": 7.887810919775976e-07, |
| "logits/chosen": -0.9531492590904236, |
| "logits/rejected": -0.939092755317688, |
| "logps/chosen": -1.5378425121307373, |
| "logps/rejected": -1.7426013946533203, |
| "loss": 4.1258, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -15.378423690795898, |
| "rewards/margins": 2.0475881099700928, |
| "rewards/rejected": -17.42601203918457, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1718970879276223, |
| "grad_norm": 43.16218753621578, |
| "learning_rate": 7.880245157038949e-07, |
| "logits/chosen": -1.0244200229644775, |
| "logits/rejected": -0.9850828647613525, |
| "logps/chosen": -1.5913515090942383, |
| "logps/rejected": -1.7808334827423096, |
| "loss": 4.1766, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -15.913515090942383, |
| "rewards/margins": 1.8948214054107666, |
| "rewards/rejected": -17.80833625793457, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.17415889171614363, |
| "grad_norm": 68.24029437702886, |
| "learning_rate": 7.872436411446671e-07, |
| "logits/chosen": -1.0308367013931274, |
| "logits/rejected": -1.053020715713501, |
| "logps/chosen": -1.578061819076538, |
| "logps/rejected": -1.7366865873336792, |
| "loss": 4.4062, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -15.780617713928223, |
| "rewards/margins": 1.5862494707107544, |
| "rewards/rejected": -17.366867065429688, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17642069550466496, |
| "grad_norm": 47.250623759092406, |
| "learning_rate": 7.86438517198662e-07, |
| "logits/chosen": -0.981247067451477, |
| "logits/rejected": -0.9654079675674438, |
| "logps/chosen": -1.4891939163208008, |
| "logps/rejected": -1.692526936531067, |
| "loss": 4.1759, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -14.89194107055664, |
| "rewards/margins": 2.0333304405212402, |
| "rewards/rejected": -16.925270080566406, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1786824992931863, |
| "grad_norm": 43.21600729867601, |
| "learning_rate": 7.856091942831366e-07, |
| "logits/chosen": -0.9647431373596191, |
| "logits/rejected": -0.9658678770065308, |
| "logps/chosen": -1.4802823066711426, |
| "logps/rejected": -1.6921435594558716, |
| "loss": 4.3292, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -14.802824020385742, |
| "rewards/margins": 2.1186134815216064, |
| "rewards/rejected": -16.921438217163086, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.18094430308170767, |
| "grad_norm": 60.91821720218818, |
| "learning_rate": 7.847557243306982e-07, |
| "logits/chosen": -1.0512564182281494, |
| "logits/rejected": -1.0319970846176147, |
| "logps/chosen": -1.5609538555145264, |
| "logps/rejected": -1.7701412439346313, |
| "loss": 4.0368, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -15.609538078308105, |
| "rewards/margins": 2.0918755531311035, |
| "rewards/rejected": -17.701412200927734, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.183206106870229, |
| "grad_norm": 63.54121135584775, |
| "learning_rate": 7.838781607860541e-07, |
| "logits/chosen": -1.0236457586288452, |
| "logits/rejected": -1.0056768655776978, |
| "logps/chosen": -1.5709818601608276, |
| "logps/rejected": -1.7774336338043213, |
| "loss": 3.6937, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -15.709818840026855, |
| "rewards/margins": 2.0645182132720947, |
| "rewards/rejected": -17.774335861206055, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18546791065875035, |
| "grad_norm": 52.43422564611808, |
| "learning_rate": 7.82976558602664e-07, |
| "logits/chosen": -1.0549571514129639, |
| "logits/rejected": -1.0641645193099976, |
| "logps/chosen": -1.5252659320831299, |
| "logps/rejected": -1.6951864957809448, |
| "loss": 4.1141, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -15.252659797668457, |
| "rewards/margins": 1.6992065906524658, |
| "rewards/rejected": -16.951866149902344, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1877297144472717, |
| "grad_norm": 71.24127939099962, |
| "learning_rate": 7.820509742392988e-07, |
| "logits/chosen": -1.0248099565505981, |
| "logits/rejected": -1.006967306137085, |
| "logps/chosen": -1.6189182996749878, |
| "logps/rejected": -1.7908198833465576, |
| "loss": 4.1344, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -16.18918228149414, |
| "rewards/margins": 1.7190148830413818, |
| "rewards/rejected": -17.9081974029541, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.18999151823579305, |
| "grad_norm": 101.94936409837923, |
| "learning_rate": 7.811014656565054e-07, |
| "logits/chosen": -1.0417755842208862, |
| "logits/rejected": -1.0172818899154663, |
| "logps/chosen": -1.5213223695755005, |
| "logps/rejected": -1.8359572887420654, |
| "loss": 3.5607, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -15.213224411010742, |
| "rewards/margins": 3.1463489532470703, |
| "rewards/rejected": -18.359575271606445, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1922533220243144, |
| "grad_norm": 55.87779805654747, |
| "learning_rate": 7.801280923129773e-07, |
| "logits/chosen": -1.0234826803207397, |
| "logits/rejected": -1.0111762285232544, |
| "logps/chosen": -1.5780723094940186, |
| "logps/rejected": -1.7445272207260132, |
| "loss": 4.4554, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -15.780723571777344, |
| "rewards/margins": 1.6645516157150269, |
| "rewards/rejected": -17.44527244567871, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.19451512581283573, |
| "grad_norm": 62.73905684172959, |
| "learning_rate": 7.791309151618305e-07, |
| "logits/chosen": -1.0294363498687744, |
| "logits/rejected": -1.026973843574524, |
| "logps/chosen": -1.68962824344635, |
| "logps/rejected": -1.888668417930603, |
| "loss": 4.1944, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -16.89628028869629, |
| "rewards/margins": 1.9904035329818726, |
| "rewards/rejected": -18.88668441772461, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1967769296013571, |
| "grad_norm": 50.00488780508724, |
| "learning_rate": 7.781099966467874e-07, |
| "logits/chosen": -1.05634343624115, |
| "logits/rejected": -1.0430347919464111, |
| "logps/chosen": -1.5399338006973267, |
| "logps/rejected": -1.6823569536209106, |
| "loss": 3.99, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -15.399337768554688, |
| "rewards/margins": 1.4242321252822876, |
| "rewards/rejected": -16.82356834411621, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.19903873338987843, |
| "grad_norm": 76.23850174230617, |
| "learning_rate": 7.770654006982664e-07, |
| "logits/chosen": -1.033555269241333, |
| "logits/rejected": -0.9970846176147461, |
| "logps/chosen": -1.7175520658493042, |
| "logps/rejected": -1.9172134399414062, |
| "loss": 4.364, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -17.175518035888672, |
| "rewards/margins": 1.9966144561767578, |
| "rewards/rejected": -19.172136306762695, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.20130053717839977, |
| "grad_norm": 60.90592219891859, |
| "learning_rate": 7.759971927293781e-07, |
| "logits/chosen": -1.0577505826950073, |
| "logits/rejected": -1.0415252447128296, |
| "logps/chosen": -1.566547155380249, |
| "logps/rejected": -1.7689849138259888, |
| "loss": 3.9552, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -15.665471076965332, |
| "rewards/margins": 2.0243773460388184, |
| "rewards/rejected": -17.689849853515625, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2035623409669211, |
| "grad_norm": 58.25113147294504, |
| "learning_rate": 7.749054396318297e-07, |
| "logits/chosen": -1.0256553888320923, |
| "logits/rejected": -1.0106987953186035, |
| "logps/chosen": -1.6812288761138916, |
| "logps/rejected": -1.8377100229263306, |
| "loss": 4.4271, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -16.81229019165039, |
| "rewards/margins": 1.564809799194336, |
| "rewards/rejected": -18.377098083496094, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20582414475544247, |
| "grad_norm": 77.85759380281476, |
| "learning_rate": 7.737902097717356e-07, |
| "logits/chosen": -0.990077793598175, |
| "logits/rejected": -0.9915316700935364, |
| "logps/chosen": -1.572546124458313, |
| "logps/rejected": -1.8305258750915527, |
| "loss": 4.1888, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -15.725460052490234, |
| "rewards/margins": 2.5797994136810303, |
| "rewards/rejected": -18.305259704589844, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2080859485439638, |
| "grad_norm": 63.82605757099174, |
| "learning_rate": 7.726515729853367e-07, |
| "logits/chosen": -0.989700198173523, |
| "logits/rejected": -0.9903428554534912, |
| "logps/chosen": -1.5940182209014893, |
| "logps/rejected": -1.7212352752685547, |
| "loss": 4.5859, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -15.940183639526367, |
| "rewards/margins": 1.2721664905548096, |
| "rewards/rejected": -17.21234893798828, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.21034775233248515, |
| "grad_norm": 65.72932573481259, |
| "learning_rate": 7.714896005746272e-07, |
| "logits/chosen": -1.024734616279602, |
| "logits/rejected": -1.0021594762802124, |
| "logps/chosen": -1.6271653175354004, |
| "logps/rejected": -1.9118987321853638, |
| "loss": 3.5493, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -16.271652221679688, |
| "rewards/margins": 2.8473360538482666, |
| "rewards/rejected": -19.118988037109375, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21260955612100652, |
| "grad_norm": 82.97110656932769, |
| "learning_rate": 7.703043653028896e-07, |
| "logits/chosen": -1.052355170249939, |
| "logits/rejected": -1.0488598346710205, |
| "logps/chosen": -1.840329647064209, |
| "logps/rejected": -2.031944990158081, |
| "loss": 4.325, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -18.403295516967773, |
| "rewards/margins": 1.9161533117294312, |
| "rewards/rejected": -20.31945037841797, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.21487135990952785, |
| "grad_norm": 101.34165470117001, |
| "learning_rate": 7.690959413901379e-07, |
| "logits/chosen": -1.0169719457626343, |
| "logits/rejected": -0.9872531890869141, |
| "logps/chosen": -1.688145399093628, |
| "logps/rejected": -1.886854648590088, |
| "loss": 4.2579, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -16.881454467773438, |
| "rewards/margins": 1.9870920181274414, |
| "rewards/rejected": -18.868545532226562, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2171331636980492, |
| "grad_norm": 61.64172871920425, |
| "learning_rate": 7.678644045084704e-07, |
| "logits/chosen": -0.9576135873794556, |
| "logits/rejected": -0.9784174561500549, |
| "logps/chosen": -1.5893274545669556, |
| "logps/rejected": -1.8206167221069336, |
| "loss": 4.2059, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -15.89327621459961, |
| "rewards/margins": 2.312891721725464, |
| "rewards/rejected": -18.206167221069336, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.21939496748657053, |
| "grad_norm": 57.351716171177195, |
| "learning_rate": 7.666098317773308e-07, |
| "logits/chosen": -1.0138105154037476, |
| "logits/rejected": -1.0110279321670532, |
| "logps/chosen": -1.73637855052948, |
| "logps/rejected": -1.9551489353179932, |
| "loss": 3.7635, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -17.363784790039062, |
| "rewards/margins": 2.187703847885132, |
| "rewards/rejected": -19.551488876342773, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2216567712750919, |
| "grad_norm": 55.90174304015073, |
| "learning_rate": 7.653323017586789e-07, |
| "logits/chosen": -1.036927342414856, |
| "logits/rejected": -1.022722840309143, |
| "logps/chosen": -1.5883724689483643, |
| "logps/rejected": -1.7606651782989502, |
| "loss": 4.022, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -15.8837251663208, |
| "rewards/margins": 1.7229257822036743, |
| "rewards/rejected": -17.60664939880371, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.22391857506361323, |
| "grad_norm": 66.1645588014719, |
| "learning_rate": 7.640318944520711e-07, |
| "logits/chosen": -1.0245897769927979, |
| "logits/rejected": -1.0293288230895996, |
| "logps/chosen": -1.7076942920684814, |
| "logps/rejected": -1.9161455631256104, |
| "loss": 4.0482, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -17.07694435119629, |
| "rewards/margins": 2.0845108032226562, |
| "rewards/rejected": -19.161455154418945, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.22618037885213457, |
| "grad_norm": 61.755723223908106, |
| "learning_rate": 7.627086912896511e-07, |
| "logits/chosen": -0.9213237762451172, |
| "logits/rejected": -0.9413522481918335, |
| "logps/chosen": -1.6155421733856201, |
| "logps/rejected": -1.8583589792251587, |
| "loss": 3.6863, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -16.15542221069336, |
| "rewards/margins": 2.4281704425811768, |
| "rewards/rejected": -18.583589553833008, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2284421826406559, |
| "grad_norm": 52.658945282281614, |
| "learning_rate": 7.613627751310499e-07, |
| "logits/chosen": -1.0586283206939697, |
| "logits/rejected": -1.0511394739151, |
| "logps/chosen": -1.6844897270202637, |
| "logps/rejected": -1.8975579738616943, |
| "loss": 3.6614, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -16.84489631652832, |
| "rewards/margins": 2.1306824684143066, |
| "rewards/rejected": -18.97557830810547, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.23070398642917728, |
| "grad_norm": 63.56535705000215, |
| "learning_rate": 7.599942302581977e-07, |
| "logits/chosen": -1.026753544807434, |
| "logits/rejected": -1.029058575630188, |
| "logps/chosen": -1.7198643684387207, |
| "logps/rejected": -2.0452115535736084, |
| "loss": 3.5109, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -17.198644638061523, |
| "rewards/margins": 3.253472328186035, |
| "rewards/rejected": -20.452116012573242, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.23296579021769862, |
| "grad_norm": 74.0515901650371, |
| "learning_rate": 7.586031423700457e-07, |
| "logits/chosen": -1.0166749954223633, |
| "logits/rejected": -1.0148067474365234, |
| "logps/chosen": -1.6628804206848145, |
| "logps/rejected": -1.8891874551773071, |
| "loss": 3.9207, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -16.62880516052246, |
| "rewards/margins": 2.263070821762085, |
| "rewards/rejected": -18.891874313354492, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23522759400621995, |
| "grad_norm": 75.20532489401907, |
| "learning_rate": 7.571895985772e-07, |
| "logits/chosen": -0.9868625402450562, |
| "logits/rejected": -0.9866358041763306, |
| "logps/chosen": -1.6601110696792603, |
| "logps/rejected": -1.9702361822128296, |
| "loss": 3.5455, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -16.601110458374023, |
| "rewards/margins": 3.1012516021728516, |
| "rewards/rejected": -19.702362060546875, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23748939779474132, |
| "grad_norm": 71.44625464382334, |
| "learning_rate": 7.557536873964661e-07, |
| "logits/chosen": -1.0506460666656494, |
| "logits/rejected": -1.0464547872543335, |
| "logps/chosen": -1.9673895835876465, |
| "logps/rejected": -2.150362491607666, |
| "loss": 4.3648, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -19.67389678955078, |
| "rewards/margins": 1.829728603363037, |
| "rewards/rejected": -21.503625869750977, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23975120158326266, |
| "grad_norm": 64.29011593779744, |
| "learning_rate": 7.542954987453069e-07, |
| "logits/chosen": -1.01687753200531, |
| "logits/rejected": -1.0152884721755981, |
| "logps/chosen": -1.7988362312316895, |
| "logps/rejected": -2.022243022918701, |
| "loss": 3.6928, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -17.98836326599121, |
| "rewards/margins": 2.2340688705444336, |
| "rewards/rejected": -20.222431182861328, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.242013005371784, |
| "grad_norm": 71.33174561105508, |
| "learning_rate": 7.528151239362108e-07, |
| "logits/chosen": -1.035961627960205, |
| "logits/rejected": -1.0357584953308105, |
| "logps/chosen": -1.9293386936187744, |
| "logps/rejected": -2.1782901287078857, |
| "loss": 3.773, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -19.29338836669922, |
| "rewards/margins": 2.4895126819610596, |
| "rewards/rejected": -21.782901763916016, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.24427480916030533, |
| "grad_norm": 80.81275558049109, |
| "learning_rate": 7.513126556709748e-07, |
| "logits/chosen": -1.0150071382522583, |
| "logits/rejected": -0.9945900440216064, |
| "logps/chosen": -1.8521547317504883, |
| "logps/rejected": -2.231255054473877, |
| "loss": 3.4008, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -18.521547317504883, |
| "rewards/margins": 3.7910025119781494, |
| "rewards/rejected": -22.312549591064453, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2465366129488267, |
| "grad_norm": 83.79674539481208, |
| "learning_rate": 7.497881880348984e-07, |
| "logits/chosen": -0.9941821098327637, |
| "logits/rejected": -0.9765617847442627, |
| "logps/chosen": -1.8816341161727905, |
| "logps/rejected": -2.1459474563598633, |
| "loss": 3.7964, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -18.816341400146484, |
| "rewards/margins": 2.6431331634521484, |
| "rewards/rejected": -21.459474563598633, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.24879841673734804, |
| "grad_norm": 94.74147514463534, |
| "learning_rate": 7.482418164908931e-07, |
| "logits/chosen": -1.0146708488464355, |
| "logits/rejected": -1.0124180316925049, |
| "logps/chosen": -1.972477912902832, |
| "logps/rejected": -2.1618151664733887, |
| "loss": 4.1515, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -19.724777221679688, |
| "rewards/margins": 1.8933711051940918, |
| "rewards/rejected": -21.618152618408203, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2510602205258694, |
| "grad_norm": 75.01460898156064, |
| "learning_rate": 7.466736378735035e-07, |
| "logits/chosen": -0.9929904937744141, |
| "logits/rejected": -1.0005165338516235, |
| "logps/chosen": -1.9998325109481812, |
| "logps/rejected": -2.2924444675445557, |
| "loss": 3.5312, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -19.99832534790039, |
| "rewards/margins": 2.926118850708008, |
| "rewards/rejected": -22.92444610595703, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2533220243143907, |
| "grad_norm": 78.18245291370165, |
| "learning_rate": 7.450837503828439e-07, |
| "logits/chosen": -0.9820634126663208, |
| "logits/rejected": -0.9711620211601257, |
| "logps/chosen": -2.0978574752807617, |
| "logps/rejected": -2.4662513732910156, |
| "loss": 3.5141, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -20.978572845458984, |
| "rewards/margins": 3.6839394569396973, |
| "rewards/rejected": -24.662513732910156, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2555838281029121, |
| "grad_norm": 67.13686530078938, |
| "learning_rate": 7.43472253578449e-07, |
| "logits/chosen": -0.9848591089248657, |
| "logits/rejected": -0.9883791208267212, |
| "logps/chosen": -1.8714666366577148, |
| "logps/rejected": -2.1671502590179443, |
| "loss": 3.6048, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -18.714664459228516, |
| "rewards/margins": 2.9568369388580322, |
| "rewards/rejected": -21.6715030670166, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2578456318914334, |
| "grad_norm": 73.56377763038063, |
| "learning_rate": 7.418392483730389e-07, |
| "logits/chosen": -0.9991137981414795, |
| "logits/rejected": -1.0059112310409546, |
| "logps/chosen": -2.052945137023926, |
| "logps/rejected": -2.341433048248291, |
| "loss": 3.8262, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -20.529449462890625, |
| "rewards/margins": 2.8848838806152344, |
| "rewards/rejected": -23.41433334350586, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.26010743567995476, |
| "grad_norm": 59.89075032149932, |
| "learning_rate": 7.401848370262012e-07, |
| "logits/chosen": -1.0506592988967896, |
| "logits/rejected": -1.034238338470459, |
| "logps/chosen": -2.0077497959136963, |
| "logps/rejected": -2.254788875579834, |
| "loss": 3.6761, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -20.07750129699707, |
| "rewards/margins": 2.470388174057007, |
| "rewards/rejected": -22.547889709472656, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2623692394684761, |
| "grad_norm": 83.49021187119062, |
| "learning_rate": 7.385091231379856e-07, |
| "logits/chosen": -1.0015934705734253, |
| "logits/rejected": -1.0052032470703125, |
| "logps/chosen": -2.1289501190185547, |
| "logps/rejected": -2.431809663772583, |
| "loss": 3.8092, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -21.28950309753418, |
| "rewards/margins": 3.0285940170288086, |
| "rewards/rejected": -24.318098068237305, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26463104325699743, |
| "grad_norm": 80.00525356848313, |
| "learning_rate": 7.368122116424182e-07, |
| "logits/chosen": -0.9749897718429565, |
| "logits/rejected": -0.976076602935791, |
| "logps/chosen": -2.118309736251831, |
| "logps/rejected": -2.3720223903656006, |
| "loss": 4.0039, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -21.18309783935547, |
| "rewards/margins": 2.5371272563934326, |
| "rewards/rejected": -23.720224380493164, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2668928470455188, |
| "grad_norm": 85.51204116705796, |
| "learning_rate": 7.350942088009289e-07, |
| "logits/chosen": -1.0251755714416504, |
| "logits/rejected": -1.028438687324524, |
| "logps/chosen": -2.1024088859558105, |
| "logps/rejected": -2.394990921020508, |
| "loss": 3.3893, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -21.024085998535156, |
| "rewards/margins": 2.9258241653442383, |
| "rewards/rejected": -23.94991111755371, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.26915465083404017, |
| "grad_norm": 93.8888274811386, |
| "learning_rate": 7.333552221956986e-07, |
| "logits/chosen": -1.137995958328247, |
| "logits/rejected": -1.1309608221054077, |
| "logps/chosen": -2.220576524734497, |
| "logps/rejected": -2.5518410205841064, |
| "loss": 3.7665, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -22.205766677856445, |
| "rewards/margins": 3.3126463890075684, |
| "rewards/rejected": -25.51841163635254, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2714164546225615, |
| "grad_norm": 82.4376030791516, |
| "learning_rate": 7.315953607229217e-07, |
| "logits/chosen": -1.0302393436431885, |
| "logits/rejected": -1.0388896465301514, |
| "logps/chosen": -2.299226760864258, |
| "logps/rejected": -2.6036436557769775, |
| "loss": 3.6506, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -22.992267608642578, |
| "rewards/margins": 3.044168710708618, |
| "rewards/rejected": -26.03643798828125, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.27367825841108284, |
| "grad_norm": 75.10951235795113, |
| "learning_rate": 7.298147345859869e-07, |
| "logits/chosen": -1.0442880392074585, |
| "logits/rejected": -1.0543183088302612, |
| "logps/chosen": -2.143893003463745, |
| "logps/rejected": -2.4186229705810547, |
| "loss": 3.6677, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -21.438934326171875, |
| "rewards/margins": 2.747298240661621, |
| "rewards/rejected": -24.186229705810547, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2759400621996042, |
| "grad_norm": 90.5899791792264, |
| "learning_rate": 7.280134552885762e-07, |
| "logits/chosen": -1.0085413455963135, |
| "logits/rejected": -0.9950270056724548, |
| "logps/chosen": -2.2459306716918945, |
| "logps/rejected": -2.5612874031066895, |
| "loss": 3.674, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -22.459308624267578, |
| "rewards/margins": 3.153568744659424, |
| "rewards/rejected": -25.612876892089844, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2782018659881255, |
| "grad_norm": 64.12127061593394, |
| "learning_rate": 7.261916356276831e-07, |
| "logits/chosen": -1.0203675031661987, |
| "logits/rejected": -1.0163829326629639, |
| "logps/chosen": -2.314136028289795, |
| "logps/rejected": -2.7963342666625977, |
| "loss": 2.9003, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -23.141361236572266, |
| "rewards/margins": 4.821981906890869, |
| "rewards/rejected": -27.963342666625977, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2804636697766469, |
| "grad_norm": 67.5121542894354, |
| "learning_rate": 7.243493896865486e-07, |
| "logits/chosen": -1.0232429504394531, |
| "logits/rejected": -1.0271434783935547, |
| "logps/chosen": -2.094038486480713, |
| "logps/rejected": -2.34799861907959, |
| "loss": 3.6745, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -20.940383911132812, |
| "rewards/margins": 2.5396037101745605, |
| "rewards/rejected": -23.47998809814453, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2827254735651682, |
| "grad_norm": 94.88612662427548, |
| "learning_rate": 7.224868328275169e-07, |
| "logits/chosen": -1.0053532123565674, |
| "logits/rejected": -1.00931715965271, |
| "logps/chosen": -2.22296142578125, |
| "logps/rejected": -2.5190277099609375, |
| "loss": 3.8801, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -22.229612350463867, |
| "rewards/margins": 2.960665464401245, |
| "rewards/rejected": -25.190279006958008, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.28498727735368956, |
| "grad_norm": 155.20517774124164, |
| "learning_rate": 7.206040816848126e-07, |
| "logits/chosen": -1.0197092294692993, |
| "logits/rejected": -1.013099193572998, |
| "logps/chosen": -2.4244911670684814, |
| "logps/rejected": -2.6366748809814453, |
| "loss": 4.3686, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -24.244909286499023, |
| "rewards/margins": 2.1218347549438477, |
| "rewards/rejected": -26.36674690246582, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2872490811422109, |
| "grad_norm": 84.56335295481283, |
| "learning_rate": 7.187012541572356e-07, |
| "logits/chosen": -1.1143990755081177, |
| "logits/rejected": -1.1029709577560425, |
| "logps/chosen": -2.4256863594055176, |
| "logps/rejected": -2.823179244995117, |
| "loss": 3.2922, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -24.25686264038086, |
| "rewards/margins": 3.9749279022216797, |
| "rewards/rejected": -28.23179054260254, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.28951088493073224, |
| "grad_norm": 87.76435547733897, |
| "learning_rate": 7.167784694007791e-07, |
| "logits/chosen": -1.0146347284317017, |
| "logits/rejected": -1.0320950746536255, |
| "logps/chosen": -2.34283447265625, |
| "logps/rejected": -2.685307502746582, |
| "loss": 3.5692, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -23.4283447265625, |
| "rewards/margins": 3.424734115600586, |
| "rewards/rejected": -26.85307502746582, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2917726887192536, |
| "grad_norm": 74.4299989128208, |
| "learning_rate": 7.148358478211682e-07, |
| "logits/chosen": -1.0819629430770874, |
| "logits/rejected": -1.0712792873382568, |
| "logps/chosen": -2.465446949005127, |
| "logps/rejected": -2.8680379390716553, |
| "loss": 3.0557, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -24.65446662902832, |
| "rewards/margins": 4.025911808013916, |
| "rewards/rejected": -28.68037986755371, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.29403449250777497, |
| "grad_norm": 68.46333390817632, |
| "learning_rate": 7.128735110663187e-07, |
| "logits/chosen": -1.0613350868225098, |
| "logits/rejected": -1.029494285583496, |
| "logps/chosen": -2.2105801105499268, |
| "logps/rejected": -2.5904829502105713, |
| "loss": 3.3787, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -22.105798721313477, |
| "rewards/margins": 3.7990307807922363, |
| "rewards/rejected": -25.904829025268555, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 98.52621039177454, |
| "learning_rate": 7.108915820187211e-07, |
| "logits/chosen": -1.0233975648880005, |
| "logits/rejected": -1.0252894163131714, |
| "logps/chosen": -2.538795232772827, |
| "logps/rejected": -2.9489595890045166, |
| "loss": 3.5147, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -25.38795280456543, |
| "rewards/margins": 4.10164213180542, |
| "rewards/rejected": -29.489593505859375, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.29855810008481765, |
| "grad_norm": 100.09208083182116, |
| "learning_rate": 7.088901847877447e-07, |
| "logits/chosen": -0.9954769611358643, |
| "logits/rejected": -1.0030215978622437, |
| "logps/chosen": -2.4815452098846436, |
| "logps/rejected": -2.736799955368042, |
| "loss": 4.4467, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -24.815452575683594, |
| "rewards/margins": 2.55254864692688, |
| "rewards/rejected": -27.367998123168945, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.300819903873339, |
| "grad_norm": 84.50154084317974, |
| "learning_rate": 7.068694447018658e-07, |
| "logits/chosen": -1.0301378965377808, |
| "logits/rejected": -1.0450048446655273, |
| "logps/chosen": -2.5099587440490723, |
| "logps/rejected": -2.9627416133880615, |
| "loss": 3.1551, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -25.09958839416504, |
| "rewards/margins": 4.527829170227051, |
| "rewards/rejected": -29.627418518066406, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3030817076618603, |
| "grad_norm": 101.0196603183953, |
| "learning_rate": 7.048294883008199e-07, |
| "logits/chosen": -1.0414271354675293, |
| "logits/rejected": -1.0482277870178223, |
| "logps/chosen": -2.413658857345581, |
| "logps/rejected": -2.7206525802612305, |
| "loss": 3.5561, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -24.13658905029297, |
| "rewards/margins": 3.0699357986450195, |
| "rewards/rejected": -27.206523895263672, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3053435114503817, |
| "grad_norm": 95.0848682674735, |
| "learning_rate": 7.027704433276776e-07, |
| "logits/chosen": -0.9876019954681396, |
| "logits/rejected": -0.9939224123954773, |
| "logps/chosen": -2.4932754039764404, |
| "logps/rejected": -2.9078118801116943, |
| "loss": 3.3544, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -24.932754516601562, |
| "rewards/margins": 4.145364761352539, |
| "rewards/rejected": -29.07811737060547, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.307605315238903, |
| "grad_norm": 105.4740106322944, |
| "learning_rate": 7.006924387208452e-07, |
| "logits/chosen": -0.9797443151473999, |
| "logits/rejected": -0.967667818069458, |
| "logps/chosen": -2.409311532974243, |
| "logps/rejected": -2.7358930110931396, |
| "loss": 3.4948, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -24.093116760253906, |
| "rewards/margins": 3.265815496444702, |
| "rewards/rejected": -27.358928680419922, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.30986711902742436, |
| "grad_norm": 76.39737789572504, |
| "learning_rate": 6.985956046059904e-07, |
| "logits/chosen": -0.9616111516952515, |
| "logits/rejected": -0.9658017158508301, |
| "logps/chosen": -2.336418628692627, |
| "logps/rejected": -2.7663118839263916, |
| "loss": 3.4032, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -23.364187240600586, |
| "rewards/margins": 4.298930644989014, |
| "rewards/rejected": -27.663118362426758, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.31212892281594573, |
| "grad_norm": 65.49308975598159, |
| "learning_rate": 6.964800722878945e-07, |
| "logits/chosen": -0.9257555603981018, |
| "logits/rejected": -0.9293062686920166, |
| "logps/chosen": -2.520350456237793, |
| "logps/rejected": -3.011868715286255, |
| "loss": 2.9539, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -25.20350456237793, |
| "rewards/margins": 4.915180683135986, |
| "rewards/rejected": -30.118684768676758, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.31439072660446704, |
| "grad_norm": 70.82118930505452, |
| "learning_rate": 6.943459742422287e-07, |
| "logits/chosen": -0.9633641839027405, |
| "logits/rejected": -0.9400511980056763, |
| "logps/chosen": -2.5129337310791016, |
| "logps/rejected": -2.921888589859009, |
| "loss": 3.524, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -25.12933921813965, |
| "rewards/margins": 4.089549541473389, |
| "rewards/rejected": -29.218887329101562, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.3166525303929884, |
| "grad_norm": 81.70116714789167, |
| "learning_rate": 6.921934441072597e-07, |
| "logits/chosen": -1.03174889087677, |
| "logits/rejected": -1.023061752319336, |
| "logps/chosen": -2.756772994995117, |
| "logps/rejected": -3.09798526763916, |
| "loss": 3.7751, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -27.567729949951172, |
| "rewards/margins": 3.412126302719116, |
| "rewards/rejected": -30.979854583740234, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3189143341815098, |
| "grad_norm": 116.29276151269757, |
| "learning_rate": 6.900226166754807e-07, |
| "logits/chosen": -0.9728091955184937, |
| "logits/rejected": -0.9851078987121582, |
| "logps/chosen": -2.8437376022338867, |
| "logps/rejected": -3.1122090816497803, |
| "loss": 4.2026, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -28.437374114990234, |
| "rewards/margins": 2.6847147941589355, |
| "rewards/rejected": -31.122089385986328, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3211761379700311, |
| "grad_norm": 81.1094387371339, |
| "learning_rate": 6.8783362788517e-07, |
| "logits/chosen": -0.9725527763366699, |
| "logits/rejected": -0.9806532263755798, |
| "logps/chosen": -2.7617523670196533, |
| "logps/rejected": -3.1742238998413086, |
| "loss": 3.9007, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -27.617523193359375, |
| "rewards/margins": 4.124715328216553, |
| "rewards/rejected": -31.74224090576172, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32343794175855245, |
| "grad_norm": 83.8384180266702, |
| "learning_rate": 6.856266148118796e-07, |
| "logits/chosen": -0.9712091684341431, |
| "logits/rejected": -0.9775328636169434, |
| "logps/chosen": -2.507340431213379, |
| "logps/rejected": -2.992020606994629, |
| "loss": 3.1458, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -25.07340431213379, |
| "rewards/margins": 4.846798896789551, |
| "rewards/rejected": -29.92020606994629, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3256997455470738, |
| "grad_norm": 95.75252245013684, |
| "learning_rate": 6.834017156598512e-07, |
| "logits/chosen": -0.9558267593383789, |
| "logits/rejected": -0.952300488948822, |
| "logps/chosen": -2.7634055614471436, |
| "logps/rejected": -3.159095048904419, |
| "loss": 3.5389, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -27.634056091308594, |
| "rewards/margins": 3.9568943977355957, |
| "rewards/rejected": -31.5909481048584, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3279615493355951, |
| "grad_norm": 66.20083759659813, |
| "learning_rate": 6.811590697533607e-07, |
| "logits/chosen": -1.021683692932129, |
| "logits/rejected": -1.030705451965332, |
| "logps/chosen": -2.7327919006347656, |
| "logps/rejected": -3.0889930725097656, |
| "loss": 3.5802, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -27.327922821044922, |
| "rewards/margins": 3.562011241912842, |
| "rewards/rejected": -30.889930725097656, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3302233531241165, |
| "grad_norm": 97.21258496539569, |
| "learning_rate": 6.788988175279951e-07, |
| "logits/chosen": -0.9693772196769714, |
| "logits/rejected": -0.9559890031814575, |
| "logps/chosen": -2.6974761486053467, |
| "logps/rejected": -3.066542148590088, |
| "loss": 3.7983, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -26.974760055541992, |
| "rewards/margins": 3.6906626224517822, |
| "rewards/rejected": -30.665422439575195, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3324851569126378, |
| "grad_norm": 109.59653360197825, |
| "learning_rate": 6.766211005218577e-07, |
| "logits/chosen": -0.9583339095115662, |
| "logits/rejected": -0.9542748332023621, |
| "logps/chosen": -2.6948695182800293, |
| "logps/rejected": -3.238664150238037, |
| "loss": 3.0583, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -26.948694229125977, |
| "rewards/margins": 5.43794584274292, |
| "rewards/rejected": -32.38664245605469, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.33474696070115917, |
| "grad_norm": 83.0664156941931, |
| "learning_rate": 6.743260613667047e-07, |
| "logits/chosen": -1.0542542934417725, |
| "logits/rejected": -1.0388411283493042, |
| "logps/chosen": -2.708282709121704, |
| "logps/rejected": -3.167202949523926, |
| "loss": 3.2972, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -27.082826614379883, |
| "rewards/margins": 4.589202404022217, |
| "rewards/rejected": -31.672027587890625, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33700876448968053, |
| "grad_norm": 54.329954302423566, |
| "learning_rate": 6.720138437790139e-07, |
| "logits/chosen": -1.0130287408828735, |
| "logits/rejected": -1.0046091079711914, |
| "logps/chosen": -2.6365139484405518, |
| "logps/rejected": -3.088283061981201, |
| "loss": 3.0963, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -26.36513900756836, |
| "rewards/margins": 4.517690658569336, |
| "rewards/rejected": -30.882829666137695, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.33927056827820185, |
| "grad_norm": 103.86460592347143, |
| "learning_rate": 6.696845925509848e-07, |
| "logits/chosen": -1.0333694219589233, |
| "logits/rejected": -1.0335674285888672, |
| "logps/chosen": -2.7684454917907715, |
| "logps/rejected": -3.083521842956543, |
| "loss": 3.8223, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -27.68445587158203, |
| "rewards/margins": 3.1507630348205566, |
| "rewards/rejected": -30.83521842956543, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3415323720667232, |
| "grad_norm": 93.51507593691238, |
| "learning_rate": 6.673384535414718e-07, |
| "logits/chosen": -1.0419607162475586, |
| "logits/rejected": -1.0317517518997192, |
| "logps/chosen": -2.868422031402588, |
| "logps/rejected": -3.1678481101989746, |
| "loss": 3.9666, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -28.684223175048828, |
| "rewards/margins": 2.9942572116851807, |
| "rewards/rejected": -31.678478240966797, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3437941758552446, |
| "grad_norm": 134.42512568099087, |
| "learning_rate": 6.649755736668511e-07, |
| "logits/chosen": -0.974323570728302, |
| "logits/rejected": -0.9719677567481995, |
| "logps/chosen": -2.5241260528564453, |
| "logps/rejected": -2.9137468338012695, |
| "loss": 2.8286, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -25.241260528564453, |
| "rewards/margins": 3.896209716796875, |
| "rewards/rejected": -29.137470245361328, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3460559796437659, |
| "grad_norm": 86.3337816462208, |
| "learning_rate": 6.625961008918192e-07, |
| "logits/chosen": -0.986197829246521, |
| "logits/rejected": -0.9864940643310547, |
| "logps/chosen": -2.601165294647217, |
| "logps/rejected": -3.0423331260681152, |
| "loss": 3.0146, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -26.011653900146484, |
| "rewards/margins": 4.411678314208984, |
| "rewards/rejected": -30.4233341217041, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.34831778343228725, |
| "grad_norm": 102.48426546292868, |
| "learning_rate": 6.602001842201289e-07, |
| "logits/chosen": -0.9800938367843628, |
| "logits/rejected": -0.9895581603050232, |
| "logps/chosen": -2.5859339237213135, |
| "logps/rejected": -2.865299701690674, |
| "loss": 3.9638, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -25.859336853027344, |
| "rewards/margins": 2.7936594486236572, |
| "rewards/rejected": -28.652997970581055, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3505795872208086, |
| "grad_norm": 104.68446919661847, |
| "learning_rate": 6.577879736852571e-07, |
| "logits/chosen": -0.9982212781906128, |
| "logits/rejected": -1.0046367645263672, |
| "logps/chosen": -2.649374485015869, |
| "logps/rejected": -2.9364640712738037, |
| "loss": 3.8496, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -26.493745803833008, |
| "rewards/margins": 2.870893955230713, |
| "rewards/rejected": -29.364643096923828, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.35284139100932993, |
| "grad_norm": 104.92988420265864, |
| "learning_rate": 6.553596203410112e-07, |
| "logits/chosen": -1.0082954168319702, |
| "logits/rejected": -1.0111855268478394, |
| "logps/chosen": -2.642954111099243, |
| "logps/rejected": -3.162132740020752, |
| "loss": 2.5535, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -26.429540634155273, |
| "rewards/margins": 5.1917853355407715, |
| "rewards/rejected": -31.621326446533203, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3551031947978513, |
| "grad_norm": 76.25729279383924, |
| "learning_rate": 6.529152762520688e-07, |
| "logits/chosen": -1.010929822921753, |
| "logits/rejected": -1.012401819229126, |
| "logps/chosen": -2.7833664417266846, |
| "logps/rejected": -3.1698460578918457, |
| "loss": 3.4851, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -27.83366584777832, |
| "rewards/margins": 3.8647947311401367, |
| "rewards/rejected": -31.69845962524414, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3573649985863726, |
| "grad_norm": 91.64977048048202, |
| "learning_rate": 6.504550944844558e-07, |
| "logits/chosen": -0.9399479627609253, |
| "logits/rejected": -0.9675872325897217, |
| "logps/chosen": -2.6605355739593506, |
| "logps/rejected": -3.0566985607147217, |
| "loss": 3.1926, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -26.60535430908203, |
| "rewards/margins": 3.961630344390869, |
| "rewards/rejected": -30.566986083984375, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.359626802374894, |
| "grad_norm": 91.26525865623015, |
| "learning_rate": 6.479792290959613e-07, |
| "logits/chosen": -0.9715840816497803, |
| "logits/rejected": -0.9836963415145874, |
| "logps/chosen": -2.655123710632324, |
| "logps/rejected": -3.1809985637664795, |
| "loss": 3.1864, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -26.551239013671875, |
| "rewards/margins": 5.258747100830078, |
| "rewards/rejected": -31.80998420715332, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.36188860616341534, |
| "grad_norm": 77.20624956215188, |
| "learning_rate": 6.454878351264906e-07, |
| "logits/chosen": -0.9734132885932922, |
| "logits/rejected": -0.9633912444114685, |
| "logps/chosen": -2.620572805404663, |
| "logps/rejected": -3.0621609687805176, |
| "loss": 3.3031, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -26.205730438232422, |
| "rewards/margins": 4.415881156921387, |
| "rewards/rejected": -30.621610641479492, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36415040995193665, |
| "grad_norm": 94.30228281712074, |
| "learning_rate": 6.429810685883565e-07, |
| "logits/chosen": -1.0244390964508057, |
| "logits/rejected": -1.0341304540634155, |
| "logps/chosen": -2.815182685852051, |
| "logps/rejected": -3.2361655235290527, |
| "loss": 2.7388, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -28.15182876586914, |
| "rewards/margins": 4.209825038909912, |
| "rewards/rejected": -32.361656188964844, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.366412213740458, |
| "grad_norm": 116.19130395085337, |
| "learning_rate": 6.404590864565088e-07, |
| "logits/chosen": -0.9736944437026978, |
| "logits/rejected": -0.9617501497268677, |
| "logps/chosen": -2.7973079681396484, |
| "logps/rejected": -3.07376766204834, |
| "loss": 3.8692, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -27.973079681396484, |
| "rewards/margins": 2.7645981311798096, |
| "rewards/rejected": -30.73767852783203, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3686740175289794, |
| "grad_norm": 81.52905794953793, |
| "learning_rate": 6.379220466587063e-07, |
| "logits/chosen": -0.9999153017997742, |
| "logits/rejected": -0.9712975025177002, |
| "logps/chosen": -2.7478442192077637, |
| "logps/rejected": -3.222566843032837, |
| "loss": 3.2696, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -27.478445053100586, |
| "rewards/margins": 4.747226238250732, |
| "rewards/rejected": -32.225669860839844, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3709358213175007, |
| "grad_norm": 120.0020137912241, |
| "learning_rate": 6.353701080656254e-07, |
| "logits/chosen": -0.980323314666748, |
| "logits/rejected": -1.0064855813980103, |
| "logps/chosen": -2.868739604949951, |
| "logps/rejected": -3.228687286376953, |
| "loss": 3.4866, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -28.687393188476562, |
| "rewards/margins": 3.599480390548706, |
| "rewards/rejected": -32.2868766784668, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37319762510602206, |
| "grad_norm": 76.2628133649645, |
| "learning_rate": 6.32803430480913e-07, |
| "logits/chosen": -0.9936685562133789, |
| "logits/rejected": -0.998755693435669, |
| "logps/chosen": -2.7757811546325684, |
| "logps/rejected": -3.2076919078826904, |
| "loss": 3.3059, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -27.757808685302734, |
| "rewards/margins": 4.319108963012695, |
| "rewards/rejected": -32.07691955566406, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3754594288945434, |
| "grad_norm": 94.14547576362092, |
| "learning_rate": 6.302221746311782e-07, |
| "logits/chosen": -0.9984323978424072, |
| "logits/rejected": -0.9655094146728516, |
| "logps/chosen": -2.6705729961395264, |
| "logps/rejected": -3.1013717651367188, |
| "loss": 3.5603, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -26.705730438232422, |
| "rewards/margins": 4.307986736297607, |
| "rewards/rejected": -31.013717651367188, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.37772123268306473, |
| "grad_norm": 98.38930298766635, |
| "learning_rate": 6.276265021559288e-07, |
| "logits/chosen": -1.0194363594055176, |
| "logits/rejected": -1.0137174129486084, |
| "logps/chosen": -2.913635015487671, |
| "logps/rejected": -3.219327449798584, |
| "loss": 3.9403, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -29.136350631713867, |
| "rewards/margins": 3.0569217205047607, |
| "rewards/rejected": -32.193275451660156, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3799830364715861, |
| "grad_norm": 80.88897995132001, |
| "learning_rate": 6.250165755974487e-07, |
| "logits/chosen": -0.9634656310081482, |
| "logits/rejected": -0.972878634929657, |
| "logps/chosen": -2.8597655296325684, |
| "logps/rejected": -3.2424063682556152, |
| "loss": 3.4182, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -28.59765625, |
| "rewards/margins": 3.8264071941375732, |
| "rewards/rejected": -32.42406463623047, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3822448402601074, |
| "grad_norm": 76.80218510989977, |
| "learning_rate": 6.223925583906192e-07, |
| "logits/chosen": -1.005441665649414, |
| "logits/rejected": -1.014135718345642, |
| "logps/chosen": -2.8562896251678467, |
| "logps/rejected": -3.3525795936584473, |
| "loss": 3.0492, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.562894821166992, |
| "rewards/margins": 4.962900161743164, |
| "rewards/rejected": -33.525794982910156, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3845066440486288, |
| "grad_norm": 76.07958470639606, |
| "learning_rate": 6.19754614852685e-07, |
| "logits/chosen": -1.013322353363037, |
| "logits/rejected": -1.016672134399414, |
| "logps/chosen": -2.7543845176696777, |
| "logps/rejected": -3.1926443576812744, |
| "loss": 3.215, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -27.543842315673828, |
| "rewards/margins": 4.382599830627441, |
| "rewards/rejected": -31.926441192626953, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.38676844783715014, |
| "grad_norm": 99.45087355333845, |
| "learning_rate": 6.171029101729644e-07, |
| "logits/chosen": -0.9391031265258789, |
| "logits/rejected": -0.9616649746894836, |
| "logps/chosen": -3.042638063430786, |
| "logps/rejected": -3.5252275466918945, |
| "loss": 3.2546, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -30.426380157470703, |
| "rewards/margins": 4.825897693634033, |
| "rewards/rejected": -35.25227737426758, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.38903025162567145, |
| "grad_norm": 85.11197223778332, |
| "learning_rate": 6.144376104025055e-07, |
| "logits/chosen": -1.0180509090423584, |
| "logits/rejected": -1.0034980773925781, |
| "logps/chosen": -2.8050010204315186, |
| "logps/rejected": -3.2974133491516113, |
| "loss": 2.9998, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -28.050006866455078, |
| "rewards/margins": 4.924124717712402, |
| "rewards/rejected": -32.9741325378418, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3912920554141928, |
| "grad_norm": 101.05359206277465, |
| "learning_rate": 6.117588824436873e-07, |
| "logits/chosen": -1.0196110010147095, |
| "logits/rejected": -1.0431230068206787, |
| "logps/chosen": -2.8156418800354004, |
| "logps/rejected": -3.218076229095459, |
| "loss": 3.4264, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -28.15641975402832, |
| "rewards/margins": 4.024340629577637, |
| "rewards/rejected": -32.180763244628906, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3935538592027142, |
| "grad_norm": 103.50825244083374, |
| "learning_rate": 6.090668940397688e-07, |
| "logits/chosen": -0.9689127206802368, |
| "logits/rejected": -0.9795331358909607, |
| "logps/chosen": -2.831442356109619, |
| "logps/rejected": -3.2767508029937744, |
| "loss": 3.475, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -28.314422607421875, |
| "rewards/margins": 4.4530839920043945, |
| "rewards/rejected": -32.76750946044922, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3958156629912355, |
| "grad_norm": 95.78558182335533, |
| "learning_rate": 6.063618137643844e-07, |
| "logits/chosen": -0.9743781089782715, |
| "logits/rejected": -0.9708334803581238, |
| "logps/chosen": -2.8041203022003174, |
| "logps/rejected": -3.2117085456848145, |
| "loss": 3.3677, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -28.041202545166016, |
| "rewards/margins": 4.075882911682129, |
| "rewards/rejected": -32.117088317871094, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.39807746677975686, |
| "grad_norm": 97.91758087524954, |
| "learning_rate": 6.03643811010988e-07, |
| "logits/chosen": -0.9981604814529419, |
| "logits/rejected": -1.0190309286117554, |
| "logps/chosen": -3.009575843811035, |
| "logps/rejected": -3.4792652130126953, |
| "loss": 2.9463, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -30.095760345458984, |
| "rewards/margins": 4.696890830993652, |
| "rewards/rejected": -34.79264831542969, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4003392705682782, |
| "grad_norm": 108.50415430037745, |
| "learning_rate": 6.009130559822453e-07, |
| "logits/chosen": -1.033525824546814, |
| "logits/rejected": -1.0412116050720215, |
| "logps/chosen": -2.977822780609131, |
| "logps/rejected": -3.300154447555542, |
| "loss": 3.8532, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -29.77822494506836, |
| "rewards/margins": 3.2233195304870605, |
| "rewards/rejected": -33.001548767089844, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.40260107435679954, |
| "grad_norm": 111.58568873914501, |
| "learning_rate": 5.981697196793758e-07, |
| "logits/chosen": -1.0652039051055908, |
| "logits/rejected": -1.0723577737808228, |
| "logps/chosen": -3.18682861328125, |
| "logps/rejected": -3.5545740127563477, |
| "loss": 3.2891, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -31.868288040161133, |
| "rewards/margins": 3.6774556636810303, |
| "rewards/rejected": -35.545745849609375, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4048628781453209, |
| "grad_norm": 113.22029697753425, |
| "learning_rate": 5.954139738914446e-07, |
| "logits/chosen": -1.0194616317749023, |
| "logits/rejected": -1.0498051643371582, |
| "logps/chosen": -3.0097670555114746, |
| "logps/rejected": -3.4376673698425293, |
| "loss": 3.4262, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -30.09766960144043, |
| "rewards/margins": 4.279005527496338, |
| "rewards/rejected": -34.376678466796875, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4071246819338422, |
| "grad_norm": 88.31224190243825, |
| "learning_rate": 5.92645991184605e-07, |
| "logits/chosen": -1.000518560409546, |
| "logits/rejected": -1.0053133964538574, |
| "logps/chosen": -3.138739585876465, |
| "logps/rejected": -3.564058303833008, |
| "loss": 3.3412, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -31.38739585876465, |
| "rewards/margins": 4.2531867027282715, |
| "rewards/rejected": -35.64058303833008, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4093864857223636, |
| "grad_norm": 93.89901341102379, |
| "learning_rate": 5.898659448912917e-07, |
| "logits/chosen": -0.9921884536743164, |
| "logits/rejected": -1.0198383331298828, |
| "logps/chosen": -2.974911689758301, |
| "logps/rejected": -3.3996355533599854, |
| "loss": 3.5069, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -29.74911880493164, |
| "rewards/margins": 4.247236251831055, |
| "rewards/rejected": -33.99635314941406, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.41164828951088495, |
| "grad_norm": 98.09431340991735, |
| "learning_rate": 5.870740090993676e-07, |
| "logits/chosen": -1.0371700525283813, |
| "logits/rejected": -1.0533473491668701, |
| "logps/chosen": -3.3046529293060303, |
| "logps/rejected": -3.7636566162109375, |
| "loss": 3.2028, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -33.046531677246094, |
| "rewards/margins": 4.590036392211914, |
| "rewards/rejected": -37.636566162109375, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.41391009329940626, |
| "grad_norm": 120.2568476777465, |
| "learning_rate": 5.842703586412214e-07, |
| "logits/chosen": -1.0324050188064575, |
| "logits/rejected": -1.0546314716339111, |
| "logps/chosen": -3.2569875717163086, |
| "logps/rejected": -3.61047625541687, |
| "loss": 3.8317, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -32.56987380981445, |
| "rewards/margins": 3.534888982772827, |
| "rewards/rejected": -36.104766845703125, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4161718970879276, |
| "grad_norm": 85.18463181569176, |
| "learning_rate": 5.814551690828203e-07, |
| "logits/chosen": -0.9997261762619019, |
| "logits/rejected": -1.0231051445007324, |
| "logps/chosen": -3.0839271545410156, |
| "logps/rejected": -3.5636889934539795, |
| "loss": 2.8158, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -30.83926773071289, |
| "rewards/margins": 4.797619342803955, |
| "rewards/rejected": -35.63688659667969, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.418433700876449, |
| "grad_norm": 109.35546689042273, |
| "learning_rate": 5.786286167127155e-07, |
| "logits/chosen": -1.039783000946045, |
| "logits/rejected": -1.0381850004196167, |
| "logps/chosen": -3.022249937057495, |
| "logps/rejected": -3.558924913406372, |
| "loss": 3.2005, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -30.222497940063477, |
| "rewards/margins": 5.366751670837402, |
| "rewards/rejected": -35.58924865722656, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.4206955046649703, |
| "grad_norm": 121.67840212859822, |
| "learning_rate": 5.757908785310031e-07, |
| "logits/chosen": -0.9625982046127319, |
| "logits/rejected": -0.9870297312736511, |
| "logps/chosen": -2.8242011070251465, |
| "logps/rejected": -3.244973659515381, |
| "loss": 3.4056, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -28.242015838623047, |
| "rewards/margins": 4.207724094390869, |
| "rewards/rejected": -32.44974136352539, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.42295730845349166, |
| "grad_norm": 83.36428430432693, |
| "learning_rate": 5.729421322382399e-07, |
| "logits/chosen": -0.9603241086006165, |
| "logits/rejected": -0.989672064781189, |
| "logps/chosen": -2.6210927963256836, |
| "logps/rejected": -3.126544952392578, |
| "loss": 2.8853, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -26.21092987060547, |
| "rewards/margins": 5.0545196533203125, |
| "rewards/rejected": -31.26544952392578, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42521911224201303, |
| "grad_norm": 101.06130324143219, |
| "learning_rate": 5.700825562243163e-07, |
| "logits/chosen": -0.9474819302558899, |
| "logits/rejected": -0.9584120512008667, |
| "logps/chosen": -2.716667413711548, |
| "logps/rejected": -3.2290937900543213, |
| "loss": 2.7513, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -27.166675567626953, |
| "rewards/margins": 5.124265670776367, |
| "rewards/rejected": -32.29093933105469, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.42748091603053434, |
| "grad_norm": 96.1462570190499, |
| "learning_rate": 5.672123295572854e-07, |
| "logits/chosen": -1.0214743614196777, |
| "logits/rejected": -1.0417888164520264, |
| "logps/chosen": -2.9014813899993896, |
| "logps/rejected": -3.219127655029297, |
| "loss": 3.2058, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -29.014816284179688, |
| "rewards/margins": 3.1764631271362305, |
| "rewards/rejected": -32.19127655029297, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4297427198190557, |
| "grad_norm": 76.49202012271405, |
| "learning_rate": 5.643316319721487e-07, |
| "logits/chosen": -1.008056879043579, |
| "logits/rejected": -1.0231949090957642, |
| "logps/chosen": -3.03181791305542, |
| "logps/rejected": -3.4028077125549316, |
| "loss": 3.6319, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -30.318180084228516, |
| "rewards/margins": 3.7098987102508545, |
| "rewards/rejected": -34.028076171875, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.432004523607577, |
| "grad_norm": 103.69905256755406, |
| "learning_rate": 5.614406438596026e-07, |
| "logits/chosen": -1.0403547286987305, |
| "logits/rejected": -1.0387647151947021, |
| "logps/chosen": -3.0885541439056396, |
| "logps/rejected": -3.5047993659973145, |
| "loss": 3.5887, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -30.885540008544922, |
| "rewards/margins": 4.162451267242432, |
| "rewards/rejected": -35.04799270629883, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4342663273960984, |
| "grad_norm": 110.46035775661962, |
| "learning_rate": 5.585395462547406e-07, |
| "logits/chosen": -0.9906610250473022, |
| "logits/rejected": -0.9895243644714355, |
| "logps/chosen": -3.050811529159546, |
| "logps/rejected": -3.3403472900390625, |
| "loss": 3.9036, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -30.50811767578125, |
| "rewards/margins": 2.8953564167022705, |
| "rewards/rejected": -33.403472900390625, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.43652813118461975, |
| "grad_norm": 101.65651790587738, |
| "learning_rate": 5.55628520825718e-07, |
| "logits/chosen": -1.0707571506500244, |
| "logits/rejected": -1.0764364004135132, |
| "logps/chosen": -3.1038661003112793, |
| "logps/rejected": -3.454857110977173, |
| "loss": 3.4261, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -31.03866195678711, |
| "rewards/margins": 3.5099072456359863, |
| "rewards/rejected": -34.54856872558594, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.43878993497314106, |
| "grad_norm": 93.93497063277395, |
| "learning_rate": 5.527077498623752e-07, |
| "logits/chosen": -1.0300379991531372, |
| "logits/rejected": -1.0473064184188843, |
| "logps/chosen": -2.991177797317505, |
| "logps/rejected": -3.39091157913208, |
| "loss": 3.1118, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -29.911775588989258, |
| "rewards/margins": 3.9973347187042236, |
| "rewards/rejected": -33.90911102294922, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4410517387616624, |
| "grad_norm": 87.97932049560232, |
| "learning_rate": 5.497774162648228e-07, |
| "logits/chosen": -0.9984287023544312, |
| "logits/rejected": -1.0242127180099487, |
| "logps/chosen": -2.906052589416504, |
| "logps/rejected": -3.424265146255493, |
| "loss": 3.135, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -29.06052589416504, |
| "rewards/margins": 5.18212366104126, |
| "rewards/rejected": -34.24264907836914, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4433135425501838, |
| "grad_norm": 104.21099663735023, |
| "learning_rate": 5.468377035319882e-07, |
| "logits/chosen": -1.0468281507492065, |
| "logits/rejected": -1.0517960786819458, |
| "logps/chosen": -2.991363048553467, |
| "logps/rejected": -3.451014280319214, |
| "loss": 3.1655, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -29.913631439208984, |
| "rewards/margins": 4.596511363983154, |
| "rewards/rejected": -34.51013946533203, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4455753463387051, |
| "grad_norm": 97.96259698980502, |
| "learning_rate": 5.438887957501248e-07, |
| "logits/chosen": -0.9534517526626587, |
| "logits/rejected": -0.9541075229644775, |
| "logps/chosen": -2.9699201583862305, |
| "logps/rejected": -3.363379716873169, |
| "loss": 3.4454, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -29.699199676513672, |
| "rewards/margins": 3.934598922729492, |
| "rewards/rejected": -33.63379669189453, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.44783715012722647, |
| "grad_norm": 81.95675095435759, |
| "learning_rate": 5.409308775812844e-07, |
| "logits/chosen": -1.0043997764587402, |
| "logits/rejected": -1.0169973373413086, |
| "logps/chosen": -3.0810163021087646, |
| "logps/rejected": -3.500107765197754, |
| "loss": 3.3197, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -30.810165405273438, |
| "rewards/margins": 4.190912246704102, |
| "rewards/rejected": -35.001075744628906, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.45009895391574783, |
| "grad_norm": 78.86431644460743, |
| "learning_rate": 5.379641342517541e-07, |
| "logits/chosen": -1.0426075458526611, |
| "logits/rejected": -1.064549207687378, |
| "logps/chosen": -2.9156980514526367, |
| "logps/rejected": -3.4033756256103516, |
| "loss": 3.1621, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -29.156984329223633, |
| "rewards/margins": 4.876772403717041, |
| "rewards/rejected": -34.03376007080078, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.45236075770426915, |
| "grad_norm": 78.79345485135647, |
| "learning_rate": 5.349887515404564e-07, |
| "logits/chosen": -1.0214426517486572, |
| "logits/rejected": -1.0430337190628052, |
| "logps/chosen": -3.1084158420562744, |
| "logps/rejected": -3.697129964828491, |
| "loss": 2.5793, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -31.08416175842285, |
| "rewards/margins": 5.887137413024902, |
| "rewards/rejected": -36.97129821777344, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4546225614927905, |
| "grad_norm": 84.91022455477744, |
| "learning_rate": 5.320049157673163e-07, |
| "logits/chosen": -0.9889032244682312, |
| "logits/rejected": -0.9852777719497681, |
| "logps/chosen": -2.9240407943725586, |
| "logps/rejected": -3.4134063720703125, |
| "loss": 2.8051, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -29.240407943725586, |
| "rewards/margins": 4.8936567306518555, |
| "rewards/rejected": -34.13405990600586, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4568843652813118, |
| "grad_norm": 84.77083592931683, |
| "learning_rate": 5.290128137815938e-07, |
| "logits/chosen": -1.0504995584487915, |
| "logits/rejected": -1.0646427869796753, |
| "logps/chosen": -3.179389476776123, |
| "logps/rejected": -3.651752233505249, |
| "loss": 2.8529, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -31.793895721435547, |
| "rewards/margins": 4.723628997802734, |
| "rewards/rejected": -36.51752471923828, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4591461690698332, |
| "grad_norm": 74.57762667630237, |
| "learning_rate": 5.260126329501828e-07, |
| "logits/chosen": -1.0635945796966553, |
| "logits/rejected": -1.0678863525390625, |
| "logps/chosen": -3.0903306007385254, |
| "logps/rejected": -3.723266363143921, |
| "loss": 2.6491, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -30.903305053710938, |
| "rewards/margins": 6.329358100891113, |
| "rewards/rejected": -37.232662200927734, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.46140797285835455, |
| "grad_norm": 143.06703544138904, |
| "learning_rate": 5.230045611458789e-07, |
| "logits/chosen": -0.9957941770553589, |
| "logits/rejected": -1.0184533596038818, |
| "logps/chosen": -2.955549955368042, |
| "logps/rejected": -3.382478952407837, |
| "loss": 3.1943, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -29.555503845214844, |
| "rewards/margins": 4.269284725189209, |
| "rewards/rejected": -33.82478713989258, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.46366977664687586, |
| "grad_norm": 84.52091519382871, |
| "learning_rate": 5.199887867356143e-07, |
| "logits/chosen": -1.0054672956466675, |
| "logits/rejected": -1.034124493598938, |
| "logps/chosen": -3.2448582649230957, |
| "logps/rejected": -3.7912087440490723, |
| "loss": 2.5166, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -32.44858169555664, |
| "rewards/margins": 5.463507652282715, |
| "rewards/rejected": -37.912086486816406, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.46593158043539723, |
| "grad_norm": 91.18340069666662, |
| "learning_rate": 5.16965498568662e-07, |
| "logits/chosen": -1.0431950092315674, |
| "logits/rejected": -1.0494352579116821, |
| "logps/chosen": -3.452178478240967, |
| "logps/rejected": -4.119001388549805, |
| "loss": 2.7708, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -34.521785736083984, |
| "rewards/margins": 6.668229103088379, |
| "rewards/rejected": -41.19001388549805, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4681933842239186, |
| "grad_norm": 97.96611032782644, |
| "learning_rate": 5.139348859648098e-07, |
| "logits/chosen": -1.0379126071929932, |
| "logits/rejected": -1.0373729467391968, |
| "logps/chosen": -3.139047145843506, |
| "logps/rejected": -3.602118492126465, |
| "loss": 2.9431, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -31.390474319458008, |
| "rewards/margins": 4.630711555480957, |
| "rewards/rejected": -36.02118682861328, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4704551880124399, |
| "grad_norm": 105.3352825410185, |
| "learning_rate": 5.10897138702506e-07, |
| "logits/chosen": -0.9758431315422058, |
| "logits/rejected": -0.9999425411224365, |
| "logps/chosen": -3.3104453086853027, |
| "logps/rejected": -3.760322093963623, |
| "loss": 3.6167, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -33.104454040527344, |
| "rewards/margins": 4.4987688064575195, |
| "rewards/rejected": -37.60322189331055, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4727169918009613, |
| "grad_norm": 93.74191045191726, |
| "learning_rate": 5.078524470069743e-07, |
| "logits/chosen": -1.0916061401367188, |
| "logits/rejected": -1.0970686674118042, |
| "logps/chosen": -3.437713384628296, |
| "logps/rejected": -3.9724910259246826, |
| "loss": 2.3738, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -34.377132415771484, |
| "rewards/margins": 5.347775936126709, |
| "rewards/rejected": -39.72490692138672, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.47497879558948264, |
| "grad_norm": 105.04884105691238, |
| "learning_rate": 5.048010015383021e-07, |
| "logits/chosen": -1.010386347770691, |
| "logits/rejected": -1.0093263387680054, |
| "logps/chosen": -3.519493579864502, |
| "logps/rejected": -4.191654205322266, |
| "loss": 2.6885, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -35.19493865966797, |
| "rewards/margins": 6.721607208251953, |
| "rewards/rejected": -41.91654586791992, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.47724059937800395, |
| "grad_norm": 113.27606664327345, |
| "learning_rate": 5.01742993379502e-07, |
| "logits/chosen": -1.0328989028930664, |
| "logits/rejected": -1.050833821296692, |
| "logps/chosen": -3.583611249923706, |
| "logps/rejected": -4.106909275054932, |
| "loss": 2.7627, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -35.836116790771484, |
| "rewards/margins": 5.2329792976379395, |
| "rewards/rejected": -41.069091796875, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.4795024031665253, |
| "grad_norm": 104.82853867162883, |
| "learning_rate": 4.986786140245446e-07, |
| "logits/chosen": -1.0020099878311157, |
| "logits/rejected": -1.0192692279815674, |
| "logps/chosen": -3.448815107345581, |
| "logps/rejected": -3.9523656368255615, |
| "loss": 3.3598, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -34.488155364990234, |
| "rewards/margins": 5.035503387451172, |
| "rewards/rejected": -39.523658752441406, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4817642069550466, |
| "grad_norm": 115.63283025199365, |
| "learning_rate": 4.956080553663687e-07, |
| "logits/chosen": -1.0651870965957642, |
| "logits/rejected": -1.0669838190078735, |
| "logps/chosen": -3.580437183380127, |
| "logps/rejected": -4.1571044921875, |
| "loss": 2.9216, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -35.804378509521484, |
| "rewards/margins": 5.766667366027832, |
| "rewards/rejected": -41.571044921875, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.484026010743568, |
| "grad_norm": 128.35809725836472, |
| "learning_rate": 4.925315096848636e-07, |
| "logits/chosen": -1.0875139236450195, |
| "logits/rejected": -1.1225805282592773, |
| "logps/chosen": -3.7847559452056885, |
| "logps/rejected": -4.454068183898926, |
| "loss": 2.724, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -37.847557067871094, |
| "rewards/margins": 6.693119525909424, |
| "rewards/rejected": -44.54068374633789, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.48628781453208936, |
| "grad_norm": 102.85432329521814, |
| "learning_rate": 4.894491696348293e-07, |
| "logits/chosen": -1.1038923263549805, |
| "logits/rejected": -1.1225206851959229, |
| "logps/chosen": -3.6632027626037598, |
| "logps/rejected": -4.115760803222656, |
| "loss": 3.1612, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -36.63202667236328, |
| "rewards/margins": 4.52557897567749, |
| "rewards/rejected": -41.15760803222656, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.48854961832061067, |
| "grad_norm": 100.23233670472042, |
| "learning_rate": 4.863612282339116e-07, |
| "logits/chosen": -1.0292141437530518, |
| "logits/rejected": -1.0395947694778442, |
| "logps/chosen": -3.849827766418457, |
| "logps/rejected": -4.370044231414795, |
| "loss": 3.3243, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -38.4982795715332, |
| "rewards/margins": 5.202164649963379, |
| "rewards/rejected": -43.700439453125, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.49081142210913203, |
| "grad_norm": 107.28211566319682, |
| "learning_rate": 4.832678788505161e-07, |
| "logits/chosen": -1.070858120918274, |
| "logits/rejected": -1.0871332883834839, |
| "logps/chosen": -3.8025472164154053, |
| "logps/rejected": -4.315918445587158, |
| "loss": 3.5426, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -38.025474548339844, |
| "rewards/margins": 5.133708953857422, |
| "rewards/rejected": -43.1591796875, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4930732258976534, |
| "grad_norm": 114.43034245836103, |
| "learning_rate": 4.801693151916985e-07, |
| "logits/chosen": -1.0845677852630615, |
| "logits/rejected": -1.1089526414871216, |
| "logps/chosen": -3.7524938583374023, |
| "logps/rejected": -4.324914932250977, |
| "loss": 2.9573, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -37.52493667602539, |
| "rewards/margins": 5.724216461181641, |
| "rewards/rejected": -43.24915313720703, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4953350296861747, |
| "grad_norm": 92.78634682717656, |
| "learning_rate": 4.770657312910354e-07, |
| "logits/chosen": -1.1212890148162842, |
| "logits/rejected": -1.1400455236434937, |
| "logps/chosen": -3.78830885887146, |
| "logps/rejected": -4.240726947784424, |
| "loss": 3.3389, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -37.88309097290039, |
| "rewards/margins": 4.524179935455322, |
| "rewards/rejected": -42.40727233886719, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4975968334746961, |
| "grad_norm": 100.92687567592242, |
| "learning_rate": 4.739573214964729e-07, |
| "logits/chosen": -1.0963836908340454, |
| "logits/rejected": -1.109062671661377, |
| "logps/chosen": -3.5693228244781494, |
| "logps/rejected": -4.1547346115112305, |
| "loss": 2.8707, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -35.6932258605957, |
| "rewards/margins": 5.854123115539551, |
| "rewards/rejected": -41.54734802246094, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49985863726321744, |
| "grad_norm": 114.6619281519479, |
| "learning_rate": 4.7084428045815733e-07, |
| "logits/chosen": -1.0976245403289795, |
| "logits/rejected": -1.1148653030395508, |
| "logps/chosen": -3.7459495067596436, |
| "logps/rejected": -4.185751914978027, |
| "loss": 3.345, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -37.459495544433594, |
| "rewards/margins": 4.39802360534668, |
| "rewards/rejected": -41.85751724243164, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5021204410517388, |
| "grad_norm": 164.15889181575363, |
| "learning_rate": 4.677268031162457e-07, |
| "logits/chosen": -1.1060454845428467, |
| "logits/rejected": -1.1150747537612915, |
| "logps/chosen": -3.5374410152435303, |
| "logps/rejected": -4.003762245178223, |
| "loss": 3.3499, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -35.374412536621094, |
| "rewards/margins": 4.663212299346924, |
| "rewards/rejected": -40.03762435913086, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5043822448402601, |
| "grad_norm": 100.61678360581845, |
| "learning_rate": 4.646050846886985e-07, |
| "logits/chosen": -1.0104235410690308, |
| "logits/rejected": -1.0443775653839111, |
| "logps/chosen": -3.3526723384857178, |
| "logps/rejected": -3.873018503189087, |
| "loss": 2.8564, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -33.5267219543457, |
| "rewards/margins": 5.203460216522217, |
| "rewards/rejected": -38.730186462402344, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5066440486287814, |
| "grad_norm": 114.26897252401342, |
| "learning_rate": 4.6147932065905494e-07, |
| "logits/chosen": -1.0860874652862549, |
| "logits/rejected": -1.0895824432373047, |
| "logps/chosen": -3.5561678409576416, |
| "logps/rejected": -3.9924020767211914, |
| "loss": 3.4319, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -35.561676025390625, |
| "rewards/margins": 4.362340927124023, |
| "rewards/rejected": -39.92401885986328, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5089058524173028, |
| "grad_norm": 106.26494466238337, |
| "learning_rate": 4.5834970676419214e-07, |
| "logits/chosen": -1.0927622318267822, |
| "logits/rejected": -1.1163933277130127, |
| "logps/chosen": -3.4472174644470215, |
| "logps/rejected": -3.9276986122131348, |
| "loss": 3.0706, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -34.472171783447266, |
| "rewards/margins": 4.804813861846924, |
| "rewards/rejected": -39.2769889831543, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5111676562058242, |
| "grad_norm": 142.79046408424182, |
| "learning_rate": 4.552164389820673e-07, |
| "logits/chosen": -1.003208041191101, |
| "logits/rejected": -1.0169744491577148, |
| "logps/chosen": -3.3661296367645264, |
| "logps/rejected": -4.004166126251221, |
| "loss": 2.8901, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -33.661293029785156, |
| "rewards/margins": 6.380366325378418, |
| "rewards/rejected": -40.041664123535156, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5134294599943455, |
| "grad_norm": 113.6822648904385, |
| "learning_rate": 4.5207971351944605e-07, |
| "logits/chosen": -1.133029580116272, |
| "logits/rejected": -1.13167142868042, |
| "logps/chosen": -3.4778950214385986, |
| "logps/rejected": -4.010115146636963, |
| "loss": 3.6064, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -34.778953552246094, |
| "rewards/margins": 5.32219934463501, |
| "rewards/rejected": -40.10114669799805, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5156912637828668, |
| "grad_norm": 107.25373744217654, |
| "learning_rate": 4.489397267996157e-07, |
| "logits/chosen": -1.1207070350646973, |
| "logits/rejected": -1.1274689435958862, |
| "logps/chosen": -3.4271583557128906, |
| "logps/rejected": -3.929474353790283, |
| "loss": 2.9591, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -34.271583557128906, |
| "rewards/margins": 5.023160934448242, |
| "rewards/rejected": -39.294742584228516, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5179530675713881, |
| "grad_norm": 111.56900476327738, |
| "learning_rate": 4.45796675450085e-07, |
| "logits/chosen": -1.0740970373153687, |
| "logits/rejected": -1.0927459001541138, |
| "logps/chosen": -3.397404193878174, |
| "logps/rejected": -3.9328582286834717, |
| "loss": 3.4474, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -33.97404098510742, |
| "rewards/margins": 5.354538917541504, |
| "rewards/rejected": -39.328582763671875, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5202148713599095, |
| "grad_norm": 103.68995444830254, |
| "learning_rate": 4.4265075629027126e-07, |
| "logits/chosen": -1.0486955642700195, |
| "logits/rejected": -1.0680443048477173, |
| "logps/chosen": -3.4485530853271484, |
| "logps/rejected": -3.9430947303771973, |
| "loss": 2.7208, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -34.485530853271484, |
| "rewards/margins": 4.94541597366333, |
| "rewards/rejected": -39.430946350097656, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5224766751484309, |
| "grad_norm": 91.41895983261622, |
| "learning_rate": 4.3950216631917563e-07, |
| "logits/chosen": -1.1048505306243896, |
| "logits/rejected": -1.1197776794433594, |
| "logps/chosen": -3.47996187210083, |
| "logps/rejected": -4.138747692108154, |
| "loss": 2.491, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -34.79962158203125, |
| "rewards/margins": 6.587857723236084, |
| "rewards/rejected": -41.387481689453125, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5247384789369522, |
| "grad_norm": 90.33357685407847, |
| "learning_rate": 4.3635110270304676e-07, |
| "logits/chosen": -1.0903615951538086, |
| "logits/rejected": -1.1053102016448975, |
| "logps/chosen": -3.2950947284698486, |
| "logps/rejected": -3.8697397708892822, |
| "loss": 2.243, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -32.950950622558594, |
| "rewards/margins": 5.746450424194336, |
| "rewards/rejected": -38.6973991394043, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5270002827254736, |
| "grad_norm": 164.72132130018886, |
| "learning_rate": 4.331977627630339e-07, |
| "logits/chosen": -1.052128553390503, |
| "logits/rejected": -1.04896080493927, |
| "logps/chosen": -3.2477636337280273, |
| "logps/rejected": -3.909946918487549, |
| "loss": 2.5147, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -32.47763442993164, |
| "rewards/margins": 6.621834754943848, |
| "rewards/rejected": -39.09946823120117, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5292620865139949, |
| "grad_norm": 105.44016716210716, |
| "learning_rate": 4.300423439628313e-07, |
| "logits/chosen": -1.100780963897705, |
| "logits/rejected": -1.1270376443862915, |
| "logps/chosen": -3.316885232925415, |
| "logps/rejected": -3.9043822288513184, |
| "loss": 2.4694, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -33.168853759765625, |
| "rewards/margins": 5.874972343444824, |
| "rewards/rejected": -39.043827056884766, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5315238903025162, |
| "grad_norm": 102.37005839911242, |
| "learning_rate": 4.268850438963118e-07, |
| "logits/chosen": -1.1160274744033813, |
| "logits/rejected": -1.1523478031158447, |
| "logps/chosen": -3.7024288177490234, |
| "logps/rejected": -4.261665344238281, |
| "loss": 2.5654, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -37.0242919921875, |
| "rewards/margins": 5.5923662185668945, |
| "rewards/rejected": -42.61665725708008, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5337856940910376, |
| "grad_norm": 100.177700252223, |
| "learning_rate": 4.2372606027515463e-07, |
| "logits/chosen": -1.0851325988769531, |
| "logits/rejected": -1.1043192148208618, |
| "logps/chosen": -3.3892626762390137, |
| "logps/rejected": -3.893834114074707, |
| "loss": 3.159, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -33.89262771606445, |
| "rewards/margins": 5.045711994171143, |
| "rewards/rejected": -38.93833923339844, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.536047497879559, |
| "grad_norm": 137.28638250019023, |
| "learning_rate": 4.2056559091646387e-07, |
| "logits/chosen": -1.099539041519165, |
| "logits/rejected": -1.134901523590088, |
| "logps/chosen": -3.5579705238342285, |
| "logps/rejected": -4.083298683166504, |
| "loss": 3.2432, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -35.579708099365234, |
| "rewards/margins": 5.25327730178833, |
| "rewards/rejected": -40.832984924316406, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5383093016680803, |
| "grad_norm": 119.2515847549312, |
| "learning_rate": 4.1740383373038116e-07, |
| "logits/chosen": -1.081524133682251, |
| "logits/rejected": -1.1097904443740845, |
| "logps/chosen": -3.4320178031921387, |
| "logps/rejected": -4.001075744628906, |
| "loss": 2.7358, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -34.32017517089844, |
| "rewards/margins": 5.690584182739258, |
| "rewards/rejected": -40.01075744628906, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5405711054566016, |
| "grad_norm": 106.65847227560742, |
| "learning_rate": 4.1424098670769255e-07, |
| "logits/chosen": -1.1375625133514404, |
| "logits/rejected": -1.1663264036178589, |
| "logps/chosen": -3.55190372467041, |
| "logps/rejected": -3.9685091972351074, |
| "loss": 3.2169, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -35.51903533935547, |
| "rewards/margins": 4.166055679321289, |
| "rewards/rejected": -39.685096740722656, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.542832909245123, |
| "grad_norm": 100.59018953371722, |
| "learning_rate": 4.1107724790743007e-07, |
| "logits/chosen": -1.0572842359542847, |
| "logits/rejected": -1.0986131429672241, |
| "logps/chosen": -3.5302083492279053, |
| "logps/rejected": -4.043272018432617, |
| "loss": 2.5905, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -35.302085876464844, |
| "rewards/margins": 5.130636692047119, |
| "rewards/rejected": -40.43272399902344, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5450947130336443, |
| "grad_norm": 130.8658906195362, |
| "learning_rate": 4.0791281544446947e-07, |
| "logits/chosen": -1.1063083410263062, |
| "logits/rejected": -1.102048635482788, |
| "logps/chosen": -3.599591016769409, |
| "logps/rejected": -4.208369731903076, |
| "loss": 2.4404, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -35.99591064453125, |
| "rewards/margins": 6.087784290313721, |
| "rewards/rejected": -42.08369064331055, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5473565168221657, |
| "grad_norm": 122.85647135040247, |
| "learning_rate": 4.0474788747712416e-07, |
| "logits/chosen": -1.1174023151397705, |
| "logits/rejected": -1.1157442331314087, |
| "logps/chosen": -3.706411838531494, |
| "logps/rejected": -4.156275272369385, |
| "loss": 3.519, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -37.064117431640625, |
| "rewards/margins": 4.498636245727539, |
| "rewards/rejected": -41.56275177001953, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.549618320610687, |
| "grad_norm": 116.9138011882379, |
| "learning_rate": 4.0158266219473573e-07, |
| "logits/chosen": -1.104333519935608, |
| "logits/rejected": -1.1253178119659424, |
| "logps/chosen": -3.5006866455078125, |
| "logps/rejected": -4.054680824279785, |
| "loss": 2.7425, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -35.00686264038086, |
| "rewards/margins": 5.539941787719727, |
| "rewards/rejected": -40.54680633544922, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5518801243992084, |
| "grad_norm": 110.06536774666624, |
| "learning_rate": 3.984173378052643e-07, |
| "logits/chosen": -1.0578244924545288, |
| "logits/rejected": -1.0700592994689941, |
| "logps/chosen": -3.424272060394287, |
| "logps/rejected": -4.020389080047607, |
| "loss": 2.677, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -34.24272155761719, |
| "rewards/margins": 5.961171627044678, |
| "rewards/rejected": -40.203887939453125, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5541419281877297, |
| "grad_norm": 111.08948121570445, |
| "learning_rate": 3.9525211252287585e-07, |
| "logits/chosen": -1.1446008682250977, |
| "logits/rejected": -1.1535121202468872, |
| "logps/chosen": -3.7665810585021973, |
| "logps/rejected": -4.514401912689209, |
| "loss": 2.6081, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -37.665809631347656, |
| "rewards/margins": 7.478209018707275, |
| "rewards/rejected": -45.144020080566406, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.556403731976251, |
| "grad_norm": 107.23381454459822, |
| "learning_rate": 3.920871845555305e-07, |
| "logits/chosen": -1.0748775005340576, |
| "logits/rejected": -1.0747300386428833, |
| "logps/chosen": -3.663707733154297, |
| "logps/rejected": -4.138020038604736, |
| "loss": 2.7566, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -36.63707733154297, |
| "rewards/margins": 4.743118762969971, |
| "rewards/rejected": -41.38019943237305, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5586655357647724, |
| "grad_norm": 137.08053930045634, |
| "learning_rate": 3.8892275209256984e-07, |
| "logits/chosen": -1.1169809103012085, |
| "logits/rejected": -1.124050259590149, |
| "logps/chosen": -3.999643564224243, |
| "logps/rejected": -4.464410305023193, |
| "loss": 3.1658, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -39.996437072753906, |
| "rewards/margins": 4.64766263961792, |
| "rewards/rejected": -44.644100189208984, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5609273395532938, |
| "grad_norm": 132.83729250085113, |
| "learning_rate": 3.8575901329230747e-07, |
| "logits/chosen": -1.0912463665008545, |
| "logits/rejected": -1.091078281402588, |
| "logps/chosen": -3.834798812866211, |
| "logps/rejected": -4.405033111572266, |
| "loss": 2.8859, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -38.347991943359375, |
| "rewards/margins": 5.7023420333862305, |
| "rewards/rejected": -44.050331115722656, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5631891433418151, |
| "grad_norm": 133.00485994320712, |
| "learning_rate": 3.8259616626961886e-07, |
| "logits/chosen": -1.0844348669052124, |
| "logits/rejected": -1.0968797206878662, |
| "logps/chosen": -3.6328611373901367, |
| "logps/rejected": -4.086342811584473, |
| "loss": 2.5866, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -36.328609466552734, |
| "rewards/margins": 4.534820079803467, |
| "rewards/rejected": -40.863426208496094, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5654509471303364, |
| "grad_norm": 111.79119137868418, |
| "learning_rate": 3.794344090835362e-07, |
| "logits/chosen": -1.1024246215820312, |
| "logits/rejected": -1.1074460744857788, |
| "logps/chosen": -3.958535671234131, |
| "logps/rejected": -4.475435733795166, |
| "loss": 3.247, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -39.58535385131836, |
| "rewards/margins": 5.169003009796143, |
| "rewards/rejected": -44.754356384277344, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5677127509188578, |
| "grad_norm": 135.70711423835718, |
| "learning_rate": 3.7627393972484534e-07, |
| "logits/chosen": -1.1514368057250977, |
| "logits/rejected": -1.1660606861114502, |
| "logps/chosen": -3.9443583488464355, |
| "logps/rejected": -4.3965229988098145, |
| "loss": 3.3998, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -39.44358825683594, |
| "rewards/margins": 4.5216474533081055, |
| "rewards/rejected": -43.965232849121094, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5699745547073791, |
| "grad_norm": 122.14532370258085, |
| "learning_rate": 3.7311495610368823e-07, |
| "logits/chosen": -1.1626390218734741, |
| "logits/rejected": -1.1832255125045776, |
| "logps/chosen": -4.036317348480225, |
| "logps/rejected": -4.556058883666992, |
| "loss": 3.1401, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -40.36316680908203, |
| "rewards/margins": 5.1974196434021, |
| "rewards/rejected": -45.56059265136719, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5722363584959005, |
| "grad_norm": 102.26046335206325, |
| "learning_rate": 3.699576560371689e-07, |
| "logits/chosen": -1.0833961963653564, |
| "logits/rejected": -1.1061809062957764, |
| "logps/chosen": -3.965580463409424, |
| "logps/rejected": -4.775724411010742, |
| "loss": 2.0592, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -39.65580749511719, |
| "rewards/margins": 8.1014404296875, |
| "rewards/rejected": -47.75724792480469, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5744981622844219, |
| "grad_norm": 122.67818060364495, |
| "learning_rate": 3.66802237236966e-07, |
| "logits/chosen": -1.1018187999725342, |
| "logits/rejected": -1.1187875270843506, |
| "logps/chosen": -3.992328643798828, |
| "logps/rejected": -4.545815944671631, |
| "loss": 2.8386, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -39.92329025268555, |
| "rewards/margins": 5.5348687171936035, |
| "rewards/rejected": -45.458160400390625, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5767599660729432, |
| "grad_norm": 118.62094863063925, |
| "learning_rate": 3.636488972969532e-07, |
| "logits/chosen": -1.0958614349365234, |
| "logits/rejected": -1.1104106903076172, |
| "logps/chosen": -3.850203514099121, |
| "logps/rejected": -4.422935485839844, |
| "loss": 2.8001, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -38.50203323364258, |
| "rewards/margins": 5.727315425872803, |
| "rewards/rejected": -44.22935104370117, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5790217698614645, |
| "grad_norm": 120.96931790013662, |
| "learning_rate": 3.604978336808244e-07, |
| "logits/chosen": -1.194389820098877, |
| "logits/rejected": -1.201671838760376, |
| "logps/chosen": -3.8203506469726562, |
| "logps/rejected": -4.354315280914307, |
| "loss": 2.8406, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -38.2035026550293, |
| "rewards/margins": 5.339643955230713, |
| "rewards/rejected": -43.54315185546875, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5812835736499858, |
| "grad_norm": 108.82242695089444, |
| "learning_rate": 3.5734924370972876e-07, |
| "logits/chosen": -1.1173956394195557, |
| "logits/rejected": -1.1346094608306885, |
| "logps/chosen": -3.774829149246216, |
| "logps/rejected": -4.319247245788574, |
| "loss": 2.9131, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -37.74829864501953, |
| "rewards/margins": 5.444177627563477, |
| "rewards/rejected": -43.192474365234375, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5835453774385072, |
| "grad_norm": 123.4677107906652, |
| "learning_rate": 3.5420332454991504e-07, |
| "logits/chosen": -1.0675257444381714, |
| "logits/rejected": -1.0911375284194946, |
| "logps/chosen": -3.746617078781128, |
| "logps/rejected": -4.322977066040039, |
| "loss": 2.8697, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -37.46617126464844, |
| "rewards/margins": 5.76359748840332, |
| "rewards/rejected": -43.22977066040039, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5858071812270286, |
| "grad_norm": 105.5976688373798, |
| "learning_rate": 3.510602732003843e-07, |
| "logits/chosen": -1.1454898118972778, |
| "logits/rejected": -1.1573333740234375, |
| "logps/chosen": -3.9214420318603516, |
| "logps/rejected": -4.544137001037598, |
| "loss": 2.6906, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -39.214420318603516, |
| "rewards/margins": 6.226948261260986, |
| "rewards/rejected": -45.441368103027344, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5880689850155499, |
| "grad_norm": 113.87298735956101, |
| "learning_rate": 3.4792028648055396e-07, |
| "logits/chosen": -1.0981009006500244, |
| "logits/rejected": -1.1229112148284912, |
| "logps/chosen": -3.649538040161133, |
| "logps/rejected": -4.314185619354248, |
| "loss": 2.535, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -36.49538040161133, |
| "rewards/margins": 6.646478176116943, |
| "rewards/rejected": -43.14185333251953, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5903307888040712, |
| "grad_norm": 116.11587475053403, |
| "learning_rate": 3.447835610179327e-07, |
| "logits/chosen": -1.0831830501556396, |
| "logits/rejected": -1.100474238395691, |
| "logps/chosen": -3.8131837844848633, |
| "logps/rejected": -4.552441120147705, |
| "loss": 2.6365, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -38.131832122802734, |
| "rewards/margins": 7.392579555511475, |
| "rewards/rejected": -45.5244140625, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 134.3566971386809, |
| "learning_rate": 3.416502932358079e-07, |
| "logits/chosen": -1.1478521823883057, |
| "logits/rejected": -1.1568520069122314, |
| "logps/chosen": -4.000604152679443, |
| "logps/rejected": -4.428308486938477, |
| "loss": 3.1547, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -40.00604248046875, |
| "rewards/margins": 4.277041435241699, |
| "rewards/rejected": -44.2830810546875, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5948543963811139, |
| "grad_norm": 135.07800674537654, |
| "learning_rate": 3.385206793409451e-07, |
| "logits/chosen": -1.0648393630981445, |
| "logits/rejected": -1.0934747457504272, |
| "logps/chosen": -3.4633100032806396, |
| "logps/rejected": -3.9857139587402344, |
| "loss": 2.8446, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -34.63310241699219, |
| "rewards/margins": 5.2240400314331055, |
| "rewards/rejected": -39.857139587402344, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5971162001696353, |
| "grad_norm": 135.56690119641354, |
| "learning_rate": 3.3539491531130163e-07, |
| "logits/chosen": -1.1003735065460205, |
| "logits/rejected": -1.1152950525283813, |
| "logps/chosen": -3.834996223449707, |
| "logps/rejected": -4.56181526184082, |
| "loss": 2.74, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -38.3499641418457, |
| "rewards/margins": 7.268193244934082, |
| "rewards/rejected": -45.61815643310547, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5993780039581567, |
| "grad_norm": 126.5260644122031, |
| "learning_rate": 3.3227319688375426e-07, |
| "logits/chosen": -1.1725904941558838, |
| "logits/rejected": -1.1713595390319824, |
| "logps/chosen": -3.9160027503967285, |
| "logps/rejected": -4.517629623413086, |
| "loss": 2.4535, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -39.16002655029297, |
| "rewards/margins": 6.016266345977783, |
| "rewards/rejected": -45.176292419433594, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.601639807746678, |
| "grad_norm": 110.87232953520362, |
| "learning_rate": 3.291557195418427e-07, |
| "logits/chosen": -1.169702410697937, |
| "logits/rejected": -1.1751315593719482, |
| "logps/chosen": -3.7779271602630615, |
| "logps/rejected": -4.331457614898682, |
| "loss": 2.7261, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -37.779273986816406, |
| "rewards/margins": 5.53530740737915, |
| "rewards/rejected": -43.314571380615234, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6039016115351993, |
| "grad_norm": 170.10518211676956, |
| "learning_rate": 3.260426785035272e-07, |
| "logits/chosen": -1.1354193687438965, |
| "logits/rejected": -1.1495718955993652, |
| "logps/chosen": -3.8486831188201904, |
| "logps/rejected": -4.348843097686768, |
| "loss": 3.5745, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -38.4868278503418, |
| "rewards/margins": 5.001603126525879, |
| "rewards/rejected": -43.48843002319336, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6061634153237206, |
| "grad_norm": 116.44936706887347, |
| "learning_rate": 3.229342687089646e-07, |
| "logits/chosen": -1.140492558479309, |
| "logits/rejected": -1.1480599641799927, |
| "logps/chosen": -3.7502431869506836, |
| "logps/rejected": -4.402010440826416, |
| "loss": 2.6712, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -37.502437591552734, |
| "rewards/margins": 6.517667770385742, |
| "rewards/rejected": -44.02009963989258, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.608425219112242, |
| "grad_norm": 126.74661926416681, |
| "learning_rate": 3.1983068480830143e-07, |
| "logits/chosen": -1.1415023803710938, |
| "logits/rejected": -1.159563422203064, |
| "logps/chosen": -3.7056891918182373, |
| "logps/rejected": -4.368051528930664, |
| "loss": 2.4807, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -37.05690002441406, |
| "rewards/margins": 6.6236186027526855, |
| "rewards/rejected": -43.68051528930664, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6106870229007634, |
| "grad_norm": 112.01190173093258, |
| "learning_rate": 3.1673212114948387e-07, |
| "logits/chosen": -1.1376936435699463, |
| "logits/rejected": -1.1491377353668213, |
| "logps/chosen": -3.693352699279785, |
| "logps/rejected": -4.348179340362549, |
| "loss": 2.362, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -36.93352508544922, |
| "rewards/margins": 6.548270225524902, |
| "rewards/rejected": -43.48179244995117, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6129488266892847, |
| "grad_norm": 131.75988503177302, |
| "learning_rate": 3.1363877176608845e-07, |
| "logits/chosen": -1.106995940208435, |
| "logits/rejected": -1.1330175399780273, |
| "logps/chosen": -3.4859375953674316, |
| "logps/rejected": -4.093271732330322, |
| "loss": 2.5697, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -34.859375, |
| "rewards/margins": 6.073338508605957, |
| "rewards/rejected": -40.932716369628906, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.615210630477806, |
| "grad_norm": 131.91725099894813, |
| "learning_rate": 3.1055083036517076e-07, |
| "logits/chosen": -1.116554617881775, |
| "logits/rejected": -1.1147369146347046, |
| "logps/chosen": -3.5844948291778564, |
| "logps/rejected": -4.161045074462891, |
| "loss": 2.8339, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -35.844947814941406, |
| "rewards/margins": 5.765503883361816, |
| "rewards/rejected": -41.610450744628906, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6174724342663274, |
| "grad_norm": 150.42025749523202, |
| "learning_rate": 3.074684903151364e-07, |
| "logits/chosen": -1.0365394353866577, |
| "logits/rejected": -1.0323246717453003, |
| "logps/chosen": -3.274428129196167, |
| "logps/rejected": -3.7612733840942383, |
| "loss": 2.5272, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -32.744285583496094, |
| "rewards/margins": 4.868447303771973, |
| "rewards/rejected": -37.612728118896484, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6197342380548487, |
| "grad_norm": 136.11200767678002, |
| "learning_rate": 3.0439194463363136e-07, |
| "logits/chosen": -1.0989680290222168, |
| "logits/rejected": -1.1113637685775757, |
| "logps/chosen": -3.509467840194702, |
| "logps/rejected": -4.074167728424072, |
| "loss": 2.6865, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -35.09468078613281, |
| "rewards/margins": 5.646995544433594, |
| "rewards/rejected": -40.741676330566406, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.6219960418433701, |
| "grad_norm": 127.44459660902798, |
| "learning_rate": 3.0132138597545537e-07, |
| "logits/chosen": -1.1420754194259644, |
| "logits/rejected": -1.1679383516311646, |
| "logps/chosen": -3.78161358833313, |
| "logps/rejected": -4.458674430847168, |
| "loss": 2.5574, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -37.81613540649414, |
| "rewards/margins": 6.7706098556518555, |
| "rewards/rejected": -44.58674240112305, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6242578456318915, |
| "grad_norm": 106.31811579148187, |
| "learning_rate": 2.982570066204981e-07, |
| "logits/chosen": -1.140415906906128, |
| "logits/rejected": -1.1640311479568481, |
| "logps/chosen": -3.6432271003723145, |
| "logps/rejected": -4.16619348526001, |
| "loss": 2.6839, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -36.43227005004883, |
| "rewards/margins": 5.229666233062744, |
| "rewards/rejected": -41.66193389892578, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.6265196494204128, |
| "grad_norm": 136.57760766398937, |
| "learning_rate": 2.951989984616979e-07, |
| "logits/chosen": -1.073444128036499, |
| "logits/rejected": -1.1000826358795166, |
| "logps/chosen": -3.8470325469970703, |
| "logps/rejected": -4.511543273925781, |
| "loss": 3.2317, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -38.4703254699707, |
| "rewards/margins": 6.645102024078369, |
| "rewards/rejected": -45.11542892456055, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6287814532089341, |
| "grad_norm": 124.12928037170991, |
| "learning_rate": 2.9214755299302584e-07, |
| "logits/chosen": -1.0841182470321655, |
| "logits/rejected": -1.1026504039764404, |
| "logps/chosen": -4.121213436126709, |
| "logps/rejected": -4.8484954833984375, |
| "loss": 2.2503, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -41.212135314941406, |
| "rewards/margins": 7.272819519042969, |
| "rewards/rejected": -48.484954833984375, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.6310432569974554, |
| "grad_norm": 153.94308352336938, |
| "learning_rate": 2.89102861297494e-07, |
| "logits/chosen": -1.151421308517456, |
| "logits/rejected": -1.1864724159240723, |
| "logps/chosen": -3.9869375228881836, |
| "logps/rejected": -4.583745956420898, |
| "loss": 3.0198, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -39.86937713623047, |
| "rewards/margins": 5.968080997467041, |
| "rewards/rejected": -45.83745574951172, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6333050607859768, |
| "grad_norm": 130.631038930031, |
| "learning_rate": 2.860651140351902e-07, |
| "logits/chosen": -1.1522353887557983, |
| "logits/rejected": -1.1588143110275269, |
| "logps/chosen": -4.093480587005615, |
| "logps/rejected": -4.765751838684082, |
| "loss": 2.5793, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -40.93480682373047, |
| "rewards/margins": 6.7227091789245605, |
| "rewards/rejected": -47.65751647949219, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6355668645744982, |
| "grad_norm": 128.14259196954697, |
| "learning_rate": 2.830345014313381e-07, |
| "logits/chosen": -1.0641752481460571, |
| "logits/rejected": -1.0930891036987305, |
| "logps/chosen": -4.28399658203125, |
| "logps/rejected": -4.995797634124756, |
| "loss": 2.376, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -42.83997344970703, |
| "rewards/margins": 7.118013381958008, |
| "rewards/rejected": -49.95798110961914, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6378286683630195, |
| "grad_norm": 131.35675571796293, |
| "learning_rate": 2.800112132643856e-07, |
| "logits/chosen": -1.147114634513855, |
| "logits/rejected": -1.1579588651657104, |
| "logps/chosen": -4.231831073760986, |
| "logps/rejected": -4.9878106117248535, |
| "loss": 2.3918, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -42.31830978393555, |
| "rewards/margins": 7.559793472290039, |
| "rewards/rejected": -49.87810516357422, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6400904721515408, |
| "grad_norm": 140.95255800161883, |
| "learning_rate": 2.7699543885412105e-07, |
| "logits/chosen": -1.1515127420425415, |
| "logits/rejected": -1.154524564743042, |
| "logps/chosen": -4.58392333984375, |
| "logps/rejected": -5.317344665527344, |
| "loss": 2.6597, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -45.8392333984375, |
| "rewards/margins": 7.334213733673096, |
| "rewards/rejected": -53.17344665527344, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6423522759400622, |
| "grad_norm": 129.8600206614132, |
| "learning_rate": 2.7398736704981725e-07, |
| "logits/chosen": -1.150620460510254, |
| "logits/rejected": -1.1516865491867065, |
| "logps/chosen": -4.295502185821533, |
| "logps/rejected": -4.943012714385986, |
| "loss": 2.3597, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -42.95501708984375, |
| "rewards/margins": 6.475111484527588, |
| "rewards/rejected": -49.43013000488281, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6446140797285835, |
| "grad_norm": 177.10199379210934, |
| "learning_rate": 2.709871862184063e-07, |
| "logits/chosen": -1.1259461641311646, |
| "logits/rejected": -1.1398793458938599, |
| "logps/chosen": -4.510852813720703, |
| "logps/rejected": -5.170764446258545, |
| "loss": 2.795, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -45.10852813720703, |
| "rewards/margins": 6.599118232727051, |
| "rewards/rejected": -51.7076416015625, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6468758835171049, |
| "grad_norm": 129.64216363994416, |
| "learning_rate": 2.679950842326837e-07, |
| "logits/chosen": -1.1655973196029663, |
| "logits/rejected": -1.1758906841278076, |
| "logps/chosen": -4.58130407333374, |
| "logps/rejected": -5.327126502990723, |
| "loss": 2.3903, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -45.81304168701172, |
| "rewards/margins": 7.45822286605835, |
| "rewards/rejected": -53.271263122558594, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6491376873056263, |
| "grad_norm": 154.94721926801571, |
| "learning_rate": 2.6501124845954363e-07, |
| "logits/chosen": -1.1170488595962524, |
| "logits/rejected": -1.1338824033737183, |
| "logps/chosen": -4.708388328552246, |
| "logps/rejected": -5.483621597290039, |
| "loss": 2.3088, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -47.08388137817383, |
| "rewards/margins": 7.752330780029297, |
| "rewards/rejected": -54.836212158203125, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6513994910941476, |
| "grad_norm": 137.88543689039244, |
| "learning_rate": 2.62035865748246e-07, |
| "logits/chosen": -1.102371096611023, |
| "logits/rejected": -1.1110731363296509, |
| "logps/chosen": -4.388948917388916, |
| "logps/rejected": -5.055350303649902, |
| "loss": 2.7012, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -43.88949203491211, |
| "rewards/margins": 6.664010047912598, |
| "rewards/rejected": -50.55350112915039, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6536612948826689, |
| "grad_norm": 185.23078201443082, |
| "learning_rate": 2.5906912241871554e-07, |
| "logits/chosen": -1.1915696859359741, |
| "logits/rejected": -1.192091941833496, |
| "logps/chosen": -4.680731773376465, |
| "logps/rejected": -5.389235496520996, |
| "loss": 2.7165, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -46.807315826416016, |
| "rewards/margins": 7.085034370422363, |
| "rewards/rejected": -53.892356872558594, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6559230986711903, |
| "grad_norm": 158.84111035426994, |
| "learning_rate": 2.561112042498753e-07, |
| "logits/chosen": -1.0850210189819336, |
| "logits/rejected": -1.1100788116455078, |
| "logps/chosen": -4.235768795013428, |
| "logps/rejected": -4.804391384124756, |
| "loss": 3.4646, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -42.35768508911133, |
| "rewards/margins": 5.686228275299072, |
| "rewards/rejected": -48.04391098022461, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6581849024597116, |
| "grad_norm": 169.51933352380985, |
| "learning_rate": 2.5316229646801195e-07, |
| "logits/chosen": -1.1047579050064087, |
| "logits/rejected": -1.1353652477264404, |
| "logps/chosen": -4.523907661437988, |
| "logps/rejected": -5.176398754119873, |
| "loss": 2.6109, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -45.23907470703125, |
| "rewards/margins": 6.524911880493164, |
| "rewards/rejected": -51.76398849487305, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.660446706248233, |
| "grad_norm": 173.65393153021836, |
| "learning_rate": 2.5022258373517714e-07, |
| "logits/chosen": -1.190868616104126, |
| "logits/rejected": -1.1945364475250244, |
| "logps/chosen": -4.056920051574707, |
| "logps/rejected": -4.662143707275391, |
| "loss": 2.5368, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -40.56919860839844, |
| "rewards/margins": 6.052234649658203, |
| "rewards/rejected": -46.62143325805664, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6627085100367544, |
| "grad_norm": 142.8629393695611, |
| "learning_rate": 2.4729225013762474e-07, |
| "logits/chosen": -1.21455717086792, |
| "logits/rejected": -1.2325047254562378, |
| "logps/chosen": -4.186022758483887, |
| "logps/rejected": -4.799120903015137, |
| "loss": 3.1403, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -41.86022186279297, |
| "rewards/margins": 6.130983829498291, |
| "rewards/rejected": -47.99120330810547, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6649703138252756, |
| "grad_norm": 135.41361240770718, |
| "learning_rate": 2.4437147917428203e-07, |
| "logits/chosen": -1.1105806827545166, |
| "logits/rejected": -1.129175066947937, |
| "logps/chosen": -3.8398869037628174, |
| "logps/rejected": -4.574884414672852, |
| "loss": 2.5425, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -38.398868560791016, |
| "rewards/margins": 7.349975109100342, |
| "rewards/rejected": -45.74884796142578, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.667232117613797, |
| "grad_norm": 168.11239113479994, |
| "learning_rate": 2.414604537452595e-07, |
| "logits/chosen": -1.1045446395874023, |
| "logits/rejected": -1.1168041229248047, |
| "logps/chosen": -3.7975552082061768, |
| "logps/rejected": -4.3420491218566895, |
| "loss": 2.7938, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -37.975547790527344, |
| "rewards/margins": 5.44494104385376, |
| "rewards/rejected": -43.420494079589844, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6694939214023183, |
| "grad_norm": 121.89542687766853, |
| "learning_rate": 2.385593561403974e-07, |
| "logits/chosen": -1.1402511596679688, |
| "logits/rejected": -1.1568533182144165, |
| "logps/chosen": -3.708047866821289, |
| "logps/rejected": -4.3688859939575195, |
| "loss": 2.3406, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -37.08047866821289, |
| "rewards/margins": 6.60837984085083, |
| "rewards/rejected": -43.68885803222656, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6717557251908397, |
| "grad_norm": 111.82954104467309, |
| "learning_rate": 2.3566836802785119e-07, |
| "logits/chosen": -1.136359453201294, |
| "logits/rejected": -1.145086407661438, |
| "logps/chosen": -3.72176456451416, |
| "logps/rejected": -4.468777656555176, |
| "loss": 2.2578, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -37.2176513671875, |
| "rewards/margins": 7.470129013061523, |
| "rewards/rejected": -44.687774658203125, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6740175289793611, |
| "grad_norm": 114.98069010016692, |
| "learning_rate": 2.327876704427146e-07, |
| "logits/chosen": -1.098710060119629, |
| "logits/rejected": -1.1092326641082764, |
| "logps/chosen": -3.6272008419036865, |
| "logps/rejected": -4.156996726989746, |
| "loss": 2.8945, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -36.27201461791992, |
| "rewards/margins": 5.2979583740234375, |
| "rewards/rejected": -41.569969177246094, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6762793327678824, |
| "grad_norm": 181.53426849775465, |
| "learning_rate": 2.2991744377568358e-07, |
| "logits/chosen": -1.1007070541381836, |
| "logits/rejected": -1.1016566753387451, |
| "logps/chosen": -3.71644926071167, |
| "logps/rejected": -4.26182746887207, |
| "loss": 2.773, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -37.164493560791016, |
| "rewards/margins": 5.45378303527832, |
| "rewards/rejected": -42.61827850341797, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6785411365564037, |
| "grad_norm": 147.43695074107663, |
| "learning_rate": 2.270578677617601e-07, |
| "logits/chosen": -1.1499643325805664, |
| "logits/rejected": -1.1593643426895142, |
| "logps/chosen": -3.7320542335510254, |
| "logps/rejected": -4.402863502502441, |
| "loss": 3.127, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -37.32053756713867, |
| "rewards/margins": 6.708094596862793, |
| "rewards/rejected": -44.02863693237305, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6808029403449251, |
| "grad_norm": 128.47706273777575, |
| "learning_rate": 2.242091214689971e-07, |
| "logits/chosen": -1.119197130203247, |
| "logits/rejected": -1.154642105102539, |
| "logps/chosen": -3.8752312660217285, |
| "logps/rejected": -4.5948710441589355, |
| "loss": 2.5358, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -38.75231170654297, |
| "rewards/margins": 7.196399211883545, |
| "rewards/rejected": -45.94871139526367, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6830647441334464, |
| "grad_norm": 156.39742683192418, |
| "learning_rate": 2.2137138328728456e-07, |
| "logits/chosen": -1.1946172714233398, |
| "logits/rejected": -1.1830028295516968, |
| "logps/chosen": -3.916076183319092, |
| "logps/rejected": -4.4446516036987305, |
| "loss": 2.484, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -39.16075897216797, |
| "rewards/margins": 5.285754680633545, |
| "rewards/rejected": -44.44651412963867, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6853265479219678, |
| "grad_norm": 112.74171061703916, |
| "learning_rate": 2.1854483091717974e-07, |
| "logits/chosen": -1.1835260391235352, |
| "logits/rejected": -1.2037560939788818, |
| "logps/chosen": -3.8349528312683105, |
| "logps/rejected": -4.525196075439453, |
| "loss": 1.9858, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -38.34953308105469, |
| "rewards/margins": 6.902431964874268, |
| "rewards/rejected": -45.25196075439453, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6875883517104892, |
| "grad_norm": 138.95665180597908, |
| "learning_rate": 2.1572964135877863e-07, |
| "logits/chosen": -1.158783197402954, |
| "logits/rejected": -1.1802828311920166, |
| "logps/chosen": -4.0684814453125, |
| "logps/rejected": -4.600527286529541, |
| "loss": 2.9658, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -40.684814453125, |
| "rewards/margins": 5.320462226867676, |
| "rewards/rejected": -46.00527572631836, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6898501554990104, |
| "grad_norm": 129.34978151860037, |
| "learning_rate": 2.1292599090063245e-07, |
| "logits/chosen": -1.1776607036590576, |
| "logits/rejected": -1.1886625289916992, |
| "logps/chosen": -4.103504180908203, |
| "logps/rejected": -4.913661479949951, |
| "loss": 2.0852, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -41.03504180908203, |
| "rewards/margins": 8.101574897766113, |
| "rewards/rejected": -49.136619567871094, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6921119592875318, |
| "grad_norm": 146.9792159571898, |
| "learning_rate": 2.1013405510870824e-07, |
| "logits/chosen": -1.1055035591125488, |
| "logits/rejected": -1.137697458267212, |
| "logps/chosen": -4.204385757446289, |
| "logps/rejected": -4.979820728302002, |
| "loss": 2.2835, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -42.043861389160156, |
| "rewards/margins": 7.7543511390686035, |
| "rewards/rejected": -49.79821014404297, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6943737630760531, |
| "grad_norm": 131.64245125711065, |
| "learning_rate": 2.0735400881539494e-07, |
| "logits/chosen": -1.1195533275604248, |
| "logits/rejected": -1.1339137554168701, |
| "logps/chosen": -4.579202651977539, |
| "logps/rejected": -5.318291664123535, |
| "loss": 2.2426, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -45.792022705078125, |
| "rewards/margins": 7.390895366668701, |
| "rewards/rejected": -53.182918548583984, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6966355668645745, |
| "grad_norm": 152.58870740768677, |
| "learning_rate": 2.0458602610855536e-07, |
| "logits/chosen": -1.2092702388763428, |
| "logits/rejected": -1.2151172161102295, |
| "logps/chosen": -4.504486560821533, |
| "logps/rejected": -5.164217472076416, |
| "loss": 2.5257, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -45.044864654541016, |
| "rewards/margins": 6.59730863571167, |
| "rewards/rejected": -51.642173767089844, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6988973706530959, |
| "grad_norm": 165.81164088683033, |
| "learning_rate": 2.0183028032062422e-07, |
| "logits/chosen": -1.1569883823394775, |
| "logits/rejected": -1.1767610311508179, |
| "logps/chosen": -4.616701126098633, |
| "logps/rejected": -5.350695610046387, |
| "loss": 2.8387, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -46.16701126098633, |
| "rewards/margins": 7.339938640594482, |
| "rewards/rejected": -53.50695037841797, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7011591744416172, |
| "grad_norm": 140.2670171009395, |
| "learning_rate": 1.9908694401775473e-07, |
| "logits/chosen": -1.186819076538086, |
| "logits/rejected": -1.191609263420105, |
| "logps/chosen": -4.841033935546875, |
| "logps/rejected": -5.632945537567139, |
| "loss": 2.6282, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -48.41033935546875, |
| "rewards/margins": 7.919118881225586, |
| "rewards/rejected": -56.32946014404297, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7034209782301385, |
| "grad_norm": 177.6691174160963, |
| "learning_rate": 1.9635618898901196e-07, |
| "logits/chosen": -1.1530416011810303, |
| "logits/rejected": -1.1640995740890503, |
| "logps/chosen": -5.184268951416016, |
| "logps/rejected": -6.004877090454102, |
| "loss": 2.7581, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -51.842689514160156, |
| "rewards/margins": 8.20608139038086, |
| "rewards/rejected": -60.04876708984375, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7056827820186599, |
| "grad_norm": 148.70727647820473, |
| "learning_rate": 1.9363818623561565e-07, |
| "logits/chosen": -1.1177144050598145, |
| "logits/rejected": -1.1454871892929077, |
| "logps/chosen": -4.92880392074585, |
| "logps/rejected": -5.721675395965576, |
| "loss": 2.7453, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -49.28804016113281, |
| "rewards/margins": 7.928720951080322, |
| "rewards/rejected": -57.21676254272461, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7079445858071812, |
| "grad_norm": 132.10892999630366, |
| "learning_rate": 1.9093310596023108e-07, |
| "logits/chosen": -1.0975664854049683, |
| "logits/rejected": -1.1099908351898193, |
| "logps/chosen": -4.774985313415527, |
| "logps/rejected": -5.697840213775635, |
| "loss": 2.0977, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -47.749847412109375, |
| "rewards/margins": 9.228551864624023, |
| "rewards/rejected": -56.97840118408203, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7102063895957026, |
| "grad_norm": 143.64663603150058, |
| "learning_rate": 1.8824111755631274e-07, |
| "logits/chosen": -1.1822034120559692, |
| "logits/rejected": -1.1960346698760986, |
| "logps/chosen": -4.618733882904053, |
| "logps/rejected": -5.318885803222656, |
| "loss": 2.4163, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -46.187339782714844, |
| "rewards/margins": 7.0015177726745605, |
| "rewards/rejected": -53.18885040283203, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.712468193384224, |
| "grad_norm": 209.87932133223504, |
| "learning_rate": 1.8556238959749457e-07, |
| "logits/chosen": -1.1291964054107666, |
| "logits/rejected": -1.1407198905944824, |
| "logps/chosen": -5.23246955871582, |
| "logps/rejected": -5.747961044311523, |
| "loss": 3.631, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -52.324703216552734, |
| "rewards/margins": 5.154911518096924, |
| "rewards/rejected": -57.4796142578125, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7147299971727452, |
| "grad_norm": 231.18280960384436, |
| "learning_rate": 1.8289708982703562e-07, |
| "logits/chosen": -1.1179161071777344, |
| "logits/rejected": -1.1054469347000122, |
| "logps/chosen": -4.88719367980957, |
| "logps/rejected": -5.661530494689941, |
| "loss": 3.2037, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -48.87194061279297, |
| "rewards/margins": 7.743368148803711, |
| "rewards/rejected": -56.61531066894531, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7169918009612666, |
| "grad_norm": 166.68538687840427, |
| "learning_rate": 1.802453851473151e-07, |
| "logits/chosen": -1.1730633974075317, |
| "logits/rejected": -1.1686493158340454, |
| "logps/chosen": -4.938043117523193, |
| "logps/rejected": -5.677487373352051, |
| "loss": 2.6408, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -49.38043212890625, |
| "rewards/margins": 7.394440650939941, |
| "rewards/rejected": -56.774871826171875, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.719253604749788, |
| "grad_norm": 150.32828613492202, |
| "learning_rate": 1.7760744160938093e-07, |
| "logits/chosen": -1.1172575950622559, |
| "logits/rejected": -1.127094030380249, |
| "logps/chosen": -4.671790599822998, |
| "logps/rejected": -5.620820045471191, |
| "loss": 2.3808, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -46.71790313720703, |
| "rewards/margins": 9.490296363830566, |
| "rewards/rejected": -56.20820617675781, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7215154085383093, |
| "grad_norm": 139.98556421446793, |
| "learning_rate": 1.7498342440255135e-07, |
| "logits/chosen": -1.162559151649475, |
| "logits/rejected": -1.1655352115631104, |
| "logps/chosen": -4.815513610839844, |
| "logps/rejected": -5.493526935577393, |
| "loss": 2.6548, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -48.1551399230957, |
| "rewards/margins": 6.780129909515381, |
| "rewards/rejected": -54.935272216796875, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7237772123268307, |
| "grad_norm": 148.0972095206849, |
| "learning_rate": 1.7237349784407115e-07, |
| "logits/chosen": -1.1615474224090576, |
| "logits/rejected": -1.1631369590759277, |
| "logps/chosen": -4.868170261383057, |
| "logps/rejected": -5.621500015258789, |
| "loss": 2.4636, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -48.68170166015625, |
| "rewards/margins": 7.533293724060059, |
| "rewards/rejected": -56.214996337890625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.726039016115352, |
| "grad_norm": 140.71046770828303, |
| "learning_rate": 1.6977782536882178e-07, |
| "logits/chosen": -1.0695641040802002, |
| "logits/rejected": -1.0845947265625, |
| "logps/chosen": -4.2239885330200195, |
| "logps/rejected": -5.09049654006958, |
| "loss": 2.4795, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -42.23988342285156, |
| "rewards/margins": 8.665081977844238, |
| "rewards/rejected": -50.904964447021484, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7283008199038733, |
| "grad_norm": 141.34556612654956, |
| "learning_rate": 1.6719656951908708e-07, |
| "logits/chosen": -1.0963406562805176, |
| "logits/rejected": -1.115116834640503, |
| "logps/chosen": -4.027679443359375, |
| "logps/rejected": -4.841724872589111, |
| "loss": 1.9544, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -40.276798248291016, |
| "rewards/margins": 8.140453338623047, |
| "rewards/rejected": -48.41725158691406, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.7305626236923947, |
| "grad_norm": 129.10288785236676, |
| "learning_rate": 1.6462989193437453e-07, |
| "logits/chosen": -1.161424994468689, |
| "logits/rejected": -1.1720256805419922, |
| "logps/chosen": -4.578709125518799, |
| "logps/rejected": -5.312513828277588, |
| "loss": 2.3471, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -45.787086486816406, |
| "rewards/margins": 7.338046550750732, |
| "rewards/rejected": -53.1251335144043, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.732824427480916, |
| "grad_norm": 145.12918025265157, |
| "learning_rate": 1.6207795334129365e-07, |
| "logits/chosen": -1.1244527101516724, |
| "logits/rejected": -1.1257734298706055, |
| "logps/chosen": -4.8168206214904785, |
| "logps/rejected": -5.5553297996521, |
| "loss": 2.6839, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -48.16820526123047, |
| "rewards/margins": 7.385091304779053, |
| "rewards/rejected": -55.55329895019531, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7350862312694374, |
| "grad_norm": 196.0171375912546, |
| "learning_rate": 1.5954091354349121e-07, |
| "logits/chosen": -1.1482946872711182, |
| "logits/rejected": -1.1611016988754272, |
| "logps/chosen": -4.442202568054199, |
| "logps/rejected": -4.989385604858398, |
| "loss": 3.3545, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -44.422027587890625, |
| "rewards/margins": 5.471826553344727, |
| "rewards/rejected": -49.89385223388672, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7373480350579588, |
| "grad_norm": 182.35479880389357, |
| "learning_rate": 1.5701893141164364e-07, |
| "logits/chosen": -1.1409872770309448, |
| "logits/rejected": -1.1580214500427246, |
| "logps/chosen": -4.6923394203186035, |
| "logps/rejected": -5.407514572143555, |
| "loss": 3.7705, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -46.923397064208984, |
| "rewards/margins": 7.151753902435303, |
| "rewards/rejected": -54.07514953613281, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.73960983884648, |
| "grad_norm": 166.07956012082315, |
| "learning_rate": 1.545121648735093e-07, |
| "logits/chosen": -1.1439409255981445, |
| "logits/rejected": -1.1424399614334106, |
| "logps/chosen": -4.474762916564941, |
| "logps/rejected": -5.130880355834961, |
| "loss": 3.0764, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -44.74763107299805, |
| "rewards/margins": 6.561171531677246, |
| "rewards/rejected": -51.308799743652344, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7418716426350014, |
| "grad_norm": 140.38393909032104, |
| "learning_rate": 1.5202077090403863e-07, |
| "logits/chosen": -1.158162236213684, |
| "logits/rejected": -1.1361192464828491, |
| "logps/chosen": -4.179104328155518, |
| "logps/rejected": -4.780279636383057, |
| "loss": 2.8063, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -41.791046142578125, |
| "rewards/margins": 6.011752128601074, |
| "rewards/rejected": -47.802799224853516, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7441334464235227, |
| "grad_norm": 149.78510692087144, |
| "learning_rate": 1.495449055155443e-07, |
| "logits/chosen": -1.148727536201477, |
| "logits/rejected": -1.16141676902771, |
| "logps/chosen": -4.255738258361816, |
| "logps/rejected": -5.054032325744629, |
| "loss": 2.3771, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -42.5573844909668, |
| "rewards/margins": 7.9829421043396, |
| "rewards/rejected": -50.54032516479492, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7463952502120441, |
| "grad_norm": 144.87586841648493, |
| "learning_rate": 1.4708472374793112e-07, |
| "logits/chosen": -1.0922203063964844, |
| "logits/rejected": -1.1075836420059204, |
| "logps/chosen": -4.270491600036621, |
| "logps/rejected": -4.7941460609436035, |
| "loss": 3.2525, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -42.704917907714844, |
| "rewards/margins": 5.236541271209717, |
| "rewards/rejected": -47.941463470458984, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7486570540005655, |
| "grad_norm": 151.1541825467292, |
| "learning_rate": 1.4464037965898878e-07, |
| "logits/chosen": -1.087780475616455, |
| "logits/rejected": -1.094712495803833, |
| "logps/chosen": -4.091693878173828, |
| "logps/rejected": -4.723832607269287, |
| "loss": 2.5101, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -40.91693878173828, |
| "rewards/margins": 6.321380138397217, |
| "rewards/rejected": -47.238319396972656, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7509188577890868, |
| "grad_norm": 150.53746416178075, |
| "learning_rate": 1.4221202631474282e-07, |
| "logits/chosen": -1.074486494064331, |
| "logits/rejected": -1.0786263942718506, |
| "logps/chosen": -4.040347099304199, |
| "logps/rejected": -4.647793769836426, |
| "loss": 2.7484, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -40.403465270996094, |
| "rewards/margins": 6.074465751647949, |
| "rewards/rejected": -46.47793197631836, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7531806615776081, |
| "grad_norm": 140.80934854690494, |
| "learning_rate": 1.3979981577987113e-07, |
| "logits/chosen": -1.1275672912597656, |
| "logits/rejected": -1.1185460090637207, |
| "logps/chosen": -3.7870967388153076, |
| "logps/rejected": -4.483286380767822, |
| "loss": 2.4486, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -37.87096405029297, |
| "rewards/margins": 6.961895942687988, |
| "rewards/rejected": -44.832862854003906, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7554424653661295, |
| "grad_norm": 136.75785695589838, |
| "learning_rate": 1.374038991081807e-07, |
| "logits/chosen": -1.1587432622909546, |
| "logits/rejected": -1.1672437191009521, |
| "logps/chosen": -4.0366315841674805, |
| "logps/rejected": -4.591939926147461, |
| "loss": 2.7988, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -40.36631774902344, |
| "rewards/margins": 5.55308723449707, |
| "rewards/rejected": -45.91940689086914, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7577042691546508, |
| "grad_norm": 133.61424676453677, |
| "learning_rate": 1.3502442633314882e-07, |
| "logits/chosen": -1.1029198169708252, |
| "logits/rejected": -1.1053695678710938, |
| "logps/chosen": -3.5087780952453613, |
| "logps/rejected": -4.049241065979004, |
| "loss": 2.7249, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -35.0877799987793, |
| "rewards/margins": 5.404629707336426, |
| "rewards/rejected": -40.49240493774414, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7599660729431722, |
| "grad_norm": 130.2580688330827, |
| "learning_rate": 1.3266154645852815e-07, |
| "logits/chosen": -1.1168211698532104, |
| "logits/rejected": -1.1052803993225098, |
| "logps/chosen": -3.832916259765625, |
| "logps/rejected": -4.46284294128418, |
| "loss": 2.3583, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -38.329158782958984, |
| "rewards/margins": 6.2992682456970215, |
| "rewards/rejected": -44.62842559814453, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7622278767316936, |
| "grad_norm": 116.1303850461595, |
| "learning_rate": 1.303154074490152e-07, |
| "logits/chosen": -1.146256685256958, |
| "logits/rejected": -1.1414530277252197, |
| "logps/chosen": -3.628737688064575, |
| "logps/rejected": -4.385705947875977, |
| "loss": 2.1664, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -36.287376403808594, |
| "rewards/margins": 7.569684028625488, |
| "rewards/rejected": -43.8570556640625, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7644896805202148, |
| "grad_norm": 129.71570053294832, |
| "learning_rate": 1.2798615622098616e-07, |
| "logits/chosen": -1.1601388454437256, |
| "logits/rejected": -1.1707144975662231, |
| "logps/chosen": -3.6512036323547363, |
| "logps/rejected": -4.333779335021973, |
| "loss": 3.0042, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -36.51203536987305, |
| "rewards/margins": 6.825753688812256, |
| "rewards/rejected": -43.337791442871094, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7667514843087362, |
| "grad_norm": 145.5066563342706, |
| "learning_rate": 1.2567393863329523e-07, |
| "logits/chosen": -1.1308033466339111, |
| "logits/rejected": -1.1647956371307373, |
| "logps/chosen": -3.8886606693267822, |
| "logps/rejected": -4.585987567901611, |
| "loss": 2.6023, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -38.88660430908203, |
| "rewards/margins": 6.973270416259766, |
| "rewards/rejected": -45.8598747253418, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7690132880972576, |
| "grad_norm": 131.06463836482803, |
| "learning_rate": 1.233788994781423e-07, |
| "logits/chosen": -1.1664081811904907, |
| "logits/rejected": -1.1904940605163574, |
| "logps/chosen": -3.713836193084717, |
| "logps/rejected": -4.329701900482178, |
| "loss": 2.6029, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -37.138362884521484, |
| "rewards/margins": 6.158655166625977, |
| "rewards/rejected": -43.29701614379883, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7712750918857789, |
| "grad_norm": 119.23231418301683, |
| "learning_rate": 1.2110118247200468e-07, |
| "logits/chosen": -1.1798597574234009, |
| "logits/rejected": -1.1897577047348022, |
| "logps/chosen": -3.7017178535461426, |
| "logps/rejected": -4.307440280914307, |
| "loss": 2.2494, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -37.017181396484375, |
| "rewards/margins": 6.057225704193115, |
| "rewards/rejected": -43.07440185546875, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7735368956743003, |
| "grad_norm": 140.0321300554404, |
| "learning_rate": 1.1884093024663933e-07, |
| "logits/chosen": -1.1550508737564087, |
| "logits/rejected": -1.1637250185012817, |
| "logps/chosen": -3.4708728790283203, |
| "logps/rejected": -4.254858493804932, |
| "loss": 2.5564, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -34.70873260498047, |
| "rewards/margins": 7.839854717254639, |
| "rewards/rejected": -42.548583984375, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7757986994628217, |
| "grad_norm": 135.8564211412602, |
| "learning_rate": 1.1659828434014886e-07, |
| "logits/chosen": -1.1468806266784668, |
| "logits/rejected": -1.130220890045166, |
| "logps/chosen": -3.576955795288086, |
| "logps/rejected": -4.336724758148193, |
| "loss": 2.3826, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -35.769561767578125, |
| "rewards/margins": 7.597687244415283, |
| "rewards/rejected": -43.36724853515625, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7780605032513429, |
| "grad_norm": 161.11121894922448, |
| "learning_rate": 1.143733851881203e-07, |
| "logits/chosen": -1.2072776556015015, |
| "logits/rejected": -1.2078864574432373, |
| "logps/chosen": -3.899902820587158, |
| "logps/rejected": -4.685684680938721, |
| "loss": 2.4913, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -38.999027252197266, |
| "rewards/margins": 7.857818603515625, |
| "rewards/rejected": -46.856842041015625, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7803223070398643, |
| "grad_norm": 125.43996792686391, |
| "learning_rate": 1.1216637211483005e-07, |
| "logits/chosen": -1.148529291152954, |
| "logits/rejected": -1.1545379161834717, |
| "logps/chosen": -3.8425681591033936, |
| "logps/rejected": -4.446829795837402, |
| "loss": 2.4109, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -38.425682067871094, |
| "rewards/margins": 6.0426130294799805, |
| "rewards/rejected": -44.468292236328125, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7825841108283856, |
| "grad_norm": 134.62772319760776, |
| "learning_rate": 1.0997738332451936e-07, |
| "logits/chosen": -1.1377711296081543, |
| "logits/rejected": -1.1397600173950195, |
| "logps/chosen": -3.997607469558716, |
| "logps/rejected": -4.604636192321777, |
| "loss": 2.5131, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -39.976070404052734, |
| "rewards/margins": 6.070294380187988, |
| "rewards/rejected": -46.04636764526367, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.784845914616907, |
| "grad_norm": 134.84346461104718, |
| "learning_rate": 1.0780655589274031e-07, |
| "logits/chosen": -1.2021856307983398, |
| "logits/rejected": -1.1845647096633911, |
| "logps/chosen": -3.890503406524658, |
| "logps/rejected": -4.5709123611450195, |
| "loss": 2.1196, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -38.905029296875, |
| "rewards/margins": 6.8040876388549805, |
| "rewards/rejected": -45.70912170410156, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7871077184054284, |
| "grad_norm": 139.41519511521523, |
| "learning_rate": 1.056540257577712e-07, |
| "logits/chosen": -1.1279267072677612, |
| "logits/rejected": -1.1376078128814697, |
| "logps/chosen": -4.36607551574707, |
| "logps/rejected": -5.16333532333374, |
| "loss": 2.0742, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -43.66075897216797, |
| "rewards/margins": 7.972592830657959, |
| "rewards/rejected": -51.63335037231445, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7893695221939496, |
| "grad_norm": 132.61876254999865, |
| "learning_rate": 1.0351992771210554e-07, |
| "logits/chosen": -1.1291594505310059, |
| "logits/rejected": -1.152091383934021, |
| "logps/chosen": -3.9938082695007324, |
| "logps/rejected": -4.693463325500488, |
| "loss": 2.5644, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -39.93808364868164, |
| "rewards/margins": 6.996548175811768, |
| "rewards/rejected": -46.93463134765625, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.791631325982471, |
| "grad_norm": 153.1527234952499, |
| "learning_rate": 1.0140439539400953e-07, |
| "logits/chosen": -1.1234492063522339, |
| "logits/rejected": -1.135466456413269, |
| "logps/chosen": -3.981572389602661, |
| "logps/rejected": -4.654304027557373, |
| "loss": 2.887, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -39.81572341918945, |
| "rewards/margins": 6.727319240570068, |
| "rewards/rejected": -46.54304504394531, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7938931297709924, |
| "grad_norm": 118.9564494853148, |
| "learning_rate": 9.930756127915488e-08, |
| "logits/chosen": -1.1307318210601807, |
| "logits/rejected": -1.1451183557510376, |
| "logps/chosen": -3.9668586254119873, |
| "logps/rejected": -4.713019847869873, |
| "loss": 2.0547, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -39.66858673095703, |
| "rewards/margins": 7.461606979370117, |
| "rewards/rejected": -47.13019561767578, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7961549335595137, |
| "grad_norm": 179.1168561225876, |
| "learning_rate": 9.722955667232242e-08, |
| "logits/chosen": -1.175784945487976, |
| "logits/rejected": -1.1912992000579834, |
| "logps/chosen": -4.297727584838867, |
| "logps/rejected": -4.875129699707031, |
| "loss": 3.1093, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -42.977272033691406, |
| "rewards/margins": 5.774031162261963, |
| "rewards/rejected": -48.751304626464844, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7984167373480351, |
| "grad_norm": 149.9462238898871, |
| "learning_rate": 9.517051169918016e-08, |
| "logits/chosen": -1.1609903573989868, |
| "logits/rejected": -1.1678093671798706, |
| "logps/chosen": -4.031696796417236, |
| "logps/rejected": -4.656764984130859, |
| "loss": 2.9365, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -40.31697082519531, |
| "rewards/margins": 6.25068473815918, |
| "rewards/rejected": -46.56765365600586, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.8006785411365565, |
| "grad_norm": 144.2370757323942, |
| "learning_rate": 9.313055529813412e-08, |
| "logits/chosen": -1.0940794944763184, |
| "logits/rejected": -1.1267677545547485, |
| "logps/chosen": -4.094084739685059, |
| "logps/rejected": -4.81442403793335, |
| "loss": 2.1865, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -40.94084548950195, |
| "rewards/margins": 7.203390121459961, |
| "rewards/rejected": -48.14424133300781, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.8029403449250777, |
| "grad_norm": 150.07773110636595, |
| "learning_rate": 9.110981521225532e-08, |
| "logits/chosen": -1.1485052108764648, |
| "logits/rejected": -1.164825439453125, |
| "logps/chosen": -4.242175579071045, |
| "logps/rejected": -4.949873924255371, |
| "loss": 2.6261, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -42.421756744384766, |
| "rewards/margins": 7.07698392868042, |
| "rewards/rejected": -49.498741149902344, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.8052021487135991, |
| "grad_norm": 158.4977836908745, |
| "learning_rate": 8.910841798127884e-08, |
| "logits/chosen": -1.1083470582962036, |
| "logits/rejected": -1.1305320262908936, |
| "logps/chosen": -4.242856025695801, |
| "logps/rejected": -4.95394229888916, |
| "loss": 2.2088, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -42.428558349609375, |
| "rewards/margins": 7.110870361328125, |
| "rewards/rejected": -49.53942108154297, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8074639525021204, |
| "grad_norm": 142.3023711469036, |
| "learning_rate": 8.712648893368139e-08, |
| "logits/chosen": -1.1344428062438965, |
| "logits/rejected": -1.1723387241363525, |
| "logps/chosen": -4.20165491104126, |
| "logps/rejected": -5.037359237670898, |
| "loss": 2.3976, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -42.01654815673828, |
| "rewards/margins": 8.357048034667969, |
| "rewards/rejected": -50.37359619140625, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8097257562906418, |
| "grad_norm": 133.95168826861777, |
| "learning_rate": 8.516415217883186e-08, |
| "logits/chosen": -1.1243913173675537, |
| "logits/rejected": -1.1292424201965332, |
| "logps/chosen": -4.095825672149658, |
| "logps/rejected": -4.912569999694824, |
| "loss": 2.2118, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -40.958255767822266, |
| "rewards/margins": 8.167447090148926, |
| "rewards/rejected": -49.125701904296875, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8119875600791632, |
| "grad_norm": 185.9215975376776, |
| "learning_rate": 8.32215305992209e-08, |
| "logits/chosen": -1.1615304946899414, |
| "logits/rejected": -1.1655793190002441, |
| "logps/chosen": -4.049884796142578, |
| "logps/rejected": -4.723489761352539, |
| "loss": 2.7838, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -40.49885177612305, |
| "rewards/margins": 6.736046314239502, |
| "rewards/rejected": -47.23489761352539, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8142493638676844, |
| "grad_norm": 125.05686534430966, |
| "learning_rate": 8.129874584276448e-08, |
| "logits/chosen": -1.1320921182632446, |
| "logits/rejected": -1.1268113851547241, |
| "logps/chosen": -4.095798969268799, |
| "logps/rejected": -4.970909118652344, |
| "loss": 1.8914, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -40.95799255371094, |
| "rewards/margins": 8.751094818115234, |
| "rewards/rejected": -49.70909118652344, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8165111676562058, |
| "grad_norm": 162.2302808539331, |
| "learning_rate": 7.939591831518746e-08, |
| "logits/chosen": -1.1332316398620605, |
| "logits/rejected": -1.1608184576034546, |
| "logps/chosen": -4.223212242126465, |
| "logps/rejected": -4.8841400146484375, |
| "loss": 2.1666, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -42.23212432861328, |
| "rewards/margins": 6.609279155731201, |
| "rewards/rejected": -48.84140396118164, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8187729714447272, |
| "grad_norm": 152.02329076128746, |
| "learning_rate": 7.751316717248304e-08, |
| "logits/chosen": -1.1371338367462158, |
| "logits/rejected": -1.144465684890747, |
| "logps/chosen": -4.625061988830566, |
| "logps/rejected": -5.566699028015137, |
| "loss": 2.3702, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -46.25061798095703, |
| "rewards/margins": 9.416373252868652, |
| "rewards/rejected": -55.666996002197266, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.8210347752332485, |
| "grad_norm": 182.21151399037953, |
| "learning_rate": 7.565061031345142e-08, |
| "logits/chosen": -1.118225336074829, |
| "logits/rejected": -1.1225550174713135, |
| "logps/chosen": -4.930262088775635, |
| "logps/rejected": -5.776245594024658, |
| "loss": 2.066, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -49.30262756347656, |
| "rewards/margins": 8.45982837677002, |
| "rewards/rejected": -57.76245880126953, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8232965790217699, |
| "grad_norm": 182.6967377285154, |
| "learning_rate": 7.380836437231686e-08, |
| "logits/chosen": -1.1148128509521484, |
| "logits/rejected": -1.118404746055603, |
| "logps/chosen": -4.367569446563721, |
| "logps/rejected": -5.1442484855651855, |
| "loss": 2.4535, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -43.675697326660156, |
| "rewards/margins": 7.766792297363281, |
| "rewards/rejected": -51.44248962402344, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.8255583828102913, |
| "grad_norm": 146.88066372474253, |
| "learning_rate": 7.198654471142371e-08, |
| "logits/chosen": -1.1199636459350586, |
| "logits/rejected": -1.1298437118530273, |
| "logps/chosen": -4.415999412536621, |
| "logps/rejected": -5.402173042297363, |
| "loss": 1.7824, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -44.15999984741211, |
| "rewards/margins": 9.861732482910156, |
| "rewards/rejected": -54.021728515625, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8278201865988125, |
| "grad_norm": 162.16963854973068, |
| "learning_rate": 7.01852654140132e-08, |
| "logits/chosen": -1.1699155569076538, |
| "logits/rejected": -1.1726248264312744, |
| "logps/chosen": -4.93940544128418, |
| "logps/rejected": -5.82332706451416, |
| "loss": 2.2319, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -49.39405059814453, |
| "rewards/margins": 8.839221954345703, |
| "rewards/rejected": -58.2332763671875, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8300819903873339, |
| "grad_norm": 162.38403071496873, |
| "learning_rate": 6.840463927707833e-08, |
| "logits/chosen": -1.1146910190582275, |
| "logits/rejected": -1.1289540529251099, |
| "logps/chosen": -4.881972312927246, |
| "logps/rejected": -5.560351848602295, |
| "loss": 2.7204, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -48.81972885131836, |
| "rewards/margins": 6.783794403076172, |
| "rewards/rejected": -55.603515625, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8323437941758552, |
| "grad_norm": 155.33335228556993, |
| "learning_rate": 6.664477780430138e-08, |
| "logits/chosen": -1.108270287513733, |
| "logits/rejected": -1.1260305643081665, |
| "logps/chosen": -4.770946979522705, |
| "logps/rejected": -5.368707656860352, |
| "loss": 2.888, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -47.70947265625, |
| "rewards/margins": 5.977604866027832, |
| "rewards/rejected": -53.68707275390625, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8346055979643766, |
| "grad_norm": 172.0274452536749, |
| "learning_rate": 6.49057911990711e-08, |
| "logits/chosen": -1.0896248817443848, |
| "logits/rejected": -1.0903116464614868, |
| "logps/chosen": -4.668241500854492, |
| "logps/rejected": -5.342168807983398, |
| "loss": 2.8429, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -46.68241500854492, |
| "rewards/margins": 6.739270210266113, |
| "rewards/rejected": -53.421688079833984, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.836867401752898, |
| "grad_norm": 147.87851842328394, |
| "learning_rate": 6.318778835758189e-08, |
| "logits/chosen": -1.1355427503585815, |
| "logits/rejected": -1.1373378038406372, |
| "logps/chosen": -4.819726943969727, |
| "logps/rejected": -5.56911563873291, |
| "loss": 1.9771, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -48.19727325439453, |
| "rewards/margins": 7.493888854980469, |
| "rewards/rejected": -55.69115447998047, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8391292055414192, |
| "grad_norm": 193.1168590090841, |
| "learning_rate": 6.149087686201433e-08, |
| "logits/chosen": -1.1564326286315918, |
| "logits/rejected": -1.163049578666687, |
| "logps/chosen": -4.552361011505127, |
| "logps/rejected": -5.267241954803467, |
| "loss": 3.2197, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -45.52361297607422, |
| "rewards/margins": 7.148810863494873, |
| "rewards/rejected": -52.67242431640625, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8413910093299406, |
| "grad_norm": 158.69146700001298, |
| "learning_rate": 5.98151629737988e-08, |
| "logits/chosen": -1.1418228149414062, |
| "logits/rejected": -1.139822006225586, |
| "logps/chosen": -4.647780418395996, |
| "logps/rejected": -5.4461588859558105, |
| "loss": 2.7971, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -46.47779846191406, |
| "rewards/margins": 7.983788967132568, |
| "rewards/rejected": -54.46159362792969, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.843652813118462, |
| "grad_norm": 127.53521312375436, |
| "learning_rate": 5.816075162696097e-08, |
| "logits/chosen": -1.1655972003936768, |
| "logits/rejected": -1.1918379068374634, |
| "logps/chosen": -4.663849830627441, |
| "logps/rejected": -5.401423931121826, |
| "loss": 1.9571, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -46.63849639892578, |
| "rewards/margins": 7.375744819641113, |
| "rewards/rejected": -54.01424026489258, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8459146169069833, |
| "grad_norm": 142.81966600033684, |
| "learning_rate": 5.6527746421551046e-08, |
| "logits/chosen": -1.106029987335205, |
| "logits/rejected": -1.091361403465271, |
| "logps/chosen": -4.538349628448486, |
| "logps/rejected": -5.323310375213623, |
| "loss": 2.36, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -45.38349533081055, |
| "rewards/margins": 7.849606513977051, |
| "rewards/rejected": -53.23310470581055, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8481764206955047, |
| "grad_norm": 157.71016320454683, |
| "learning_rate": 5.4916249617156064e-08, |
| "logits/chosen": -1.1139298677444458, |
| "logits/rejected": -1.1261509656906128, |
| "logps/chosen": -4.135310649871826, |
| "logps/rejected": -4.878140926361084, |
| "loss": 2.345, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -41.353111267089844, |
| "rewards/margins": 7.42829704284668, |
| "rewards/rejected": -48.781402587890625, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8504382244840261, |
| "grad_norm": 132.44635331603325, |
| "learning_rate": 5.332636212649646e-08, |
| "logits/chosen": -1.1287761926651, |
| "logits/rejected": -1.1302844285964966, |
| "logps/chosen": -4.467879295349121, |
| "logps/rejected": -5.283463954925537, |
| "loss": 2.1031, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -44.678794860839844, |
| "rewards/margins": 8.155845642089844, |
| "rewards/rejected": -52.83464050292969, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8527000282725473, |
| "grad_norm": 150.5335514700392, |
| "learning_rate": 5.17581835091069e-08, |
| "logits/chosen": -1.1303383111953735, |
| "logits/rejected": -1.1533814668655396, |
| "logps/chosen": -4.488173961639404, |
| "logps/rejected": -5.281373977661133, |
| "loss": 2.7408, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -44.881736755371094, |
| "rewards/margins": 7.932003498077393, |
| "rewards/rejected": -52.813743591308594, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8549618320610687, |
| "grad_norm": 150.25699456017983, |
| "learning_rate": 5.02118119651016e-08, |
| "logits/chosen": -1.1570930480957031, |
| "logits/rejected": -1.159618616104126, |
| "logps/chosen": -4.484018802642822, |
| "logps/rejected": -5.202611923217773, |
| "loss": 2.5301, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -44.840187072753906, |
| "rewards/margins": 7.1859331130981445, |
| "rewards/rejected": -52.026119232177734, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.85722363584959, |
| "grad_norm": 162.1310863378, |
| "learning_rate": 4.868734432902526e-08, |
| "logits/chosen": -1.2017693519592285, |
| "logits/rejected": -1.202324390411377, |
| "logps/chosen": -4.508758068084717, |
| "logps/rejected": -5.381972312927246, |
| "loss": 2.9957, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -45.087581634521484, |
| "rewards/margins": 8.732136726379395, |
| "rewards/rejected": -53.8197135925293, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8594854396381114, |
| "grad_norm": 171.7256004142805, |
| "learning_rate": 4.7184876063789134e-08, |
| "logits/chosen": -1.14678955078125, |
| "logits/rejected": -1.1611804962158203, |
| "logps/chosen": -3.9182119369506836, |
| "logps/rejected": -4.665888786315918, |
| "loss": 2.5436, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -39.1821174621582, |
| "rewards/margins": 7.4767746925354, |
| "rewards/rejected": -46.65888977050781, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8617472434266328, |
| "grad_norm": 126.74508418371084, |
| "learning_rate": 4.570450125469314e-08, |
| "logits/chosen": -1.1241579055786133, |
| "logits/rejected": -1.1344544887542725, |
| "logps/chosen": -4.487060546875, |
| "logps/rejected": -5.410554885864258, |
| "loss": 2.0142, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -44.870601654052734, |
| "rewards/margins": 9.234944343566895, |
| "rewards/rejected": -54.10554504394531, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.864009047215154, |
| "grad_norm": 150.5877064450684, |
| "learning_rate": 4.424631260353378e-08, |
| "logits/chosen": -1.1614850759506226, |
| "logits/rejected": -1.1724066734313965, |
| "logps/chosen": -4.289636611938477, |
| "logps/rejected": -4.987452983856201, |
| "loss": 2.6354, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -42.8963623046875, |
| "rewards/margins": 6.97816801071167, |
| "rewards/rejected": -49.874534606933594, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8662708510036754, |
| "grad_norm": 135.3394329513268, |
| "learning_rate": 4.281040142280008e-08, |
| "logits/chosen": -1.1927827596664429, |
| "logits/rejected": -1.2009069919586182, |
| "logps/chosen": -4.040445804595947, |
| "logps/rejected": -4.89124870300293, |
| "loss": 1.918, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -40.40446090698242, |
| "rewards/margins": 8.508023262023926, |
| "rewards/rejected": -48.91248321533203, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8685326547921968, |
| "grad_norm": 197.32088580990666, |
| "learning_rate": 4.1396857629954286e-08, |
| "logits/chosen": -1.152172565460205, |
| "logits/rejected": -1.1629152297973633, |
| "logps/chosen": -4.775947093963623, |
| "logps/rejected": -5.507784366607666, |
| "loss": 2.7031, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -47.75947189331055, |
| "rewards/margins": 7.318375110626221, |
| "rewards/rejected": -55.07784652709961, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8707944585807181, |
| "grad_norm": 147.48372966461358, |
| "learning_rate": 4.000576974180232e-08, |
| "logits/chosen": -1.1277602910995483, |
| "logits/rejected": -1.1425299644470215, |
| "logps/chosen": -4.28041410446167, |
| "logps/rejected": -4.912994861602783, |
| "loss": 3.3009, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -42.80414581298828, |
| "rewards/margins": 6.325807094573975, |
| "rewards/rejected": -49.12995147705078, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8730562623692395, |
| "grad_norm": 216.40246174451113, |
| "learning_rate": 3.8637224868950066e-08, |
| "logits/chosen": -1.1443856954574585, |
| "logits/rejected": -1.158379077911377, |
| "logps/chosen": -4.138412952423096, |
| "logps/rejected": -4.75631046295166, |
| "loss": 2.8595, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -41.38412857055664, |
| "rewards/margins": 6.178977012634277, |
| "rewards/rejected": -47.56310272216797, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8753180661577609, |
| "grad_norm": 145.70726367236028, |
| "learning_rate": 3.729130871034885e-08, |
| "logits/chosen": -1.1509549617767334, |
| "logits/rejected": -1.1646292209625244, |
| "logps/chosen": -4.321000099182129, |
| "logps/rejected": -5.116883277893066, |
| "loss": 2.1179, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -43.21000671386719, |
| "rewards/margins": 7.958827972412109, |
| "rewards/rejected": -51.1688346862793, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8775798699462821, |
| "grad_norm": 163.83720976168166, |
| "learning_rate": 3.596810554792888e-08, |
| "logits/chosen": -1.146296501159668, |
| "logits/rejected": -1.171584963798523, |
| "logps/chosen": -4.1524882316589355, |
| "logps/rejected": -4.907288074493408, |
| "loss": 2.833, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -41.52488327026367, |
| "rewards/margins": 7.547997951507568, |
| "rewards/rejected": -49.07288360595703, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8798416737348035, |
| "grad_norm": 171.451051766312, |
| "learning_rate": 3.466769824132116e-08, |
| "logits/chosen": -1.1617058515548706, |
| "logits/rejected": -1.151517629623413, |
| "logps/chosen": -4.187567234039307, |
| "logps/rejected": -4.928333282470703, |
| "loss": 2.0192, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -41.87567138671875, |
| "rewards/margins": 7.407662391662598, |
| "rewards/rejected": -49.28333282470703, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8821034775233249, |
| "grad_norm": 149.50679016885917, |
| "learning_rate": 3.339016822266925e-08, |
| "logits/chosen": -1.1291420459747314, |
| "logits/rejected": -1.154278039932251, |
| "logps/chosen": -4.321372032165527, |
| "logps/rejected": -5.223550319671631, |
| "loss": 1.7499, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -43.213722229003906, |
| "rewards/margins": 9.0217866897583, |
| "rewards/rejected": -52.235511779785156, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8843652813118462, |
| "grad_norm": 138.97884749980054, |
| "learning_rate": 3.213559549152958e-08, |
| "logits/chosen": -1.1683623790740967, |
| "logits/rejected": -1.1750186681747437, |
| "logps/chosen": -4.231035232543945, |
| "logps/rejected": -5.0254597663879395, |
| "loss": 2.4769, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -42.31035614013672, |
| "rewards/margins": 7.944244861602783, |
| "rewards/rejected": -50.25459671020508, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8866270851003676, |
| "grad_norm": 149.13743982029294, |
| "learning_rate": 3.090405860986203e-08, |
| "logits/chosen": -1.1904428005218506, |
| "logits/rejected": -1.225684404373169, |
| "logps/chosen": -4.472517967224121, |
| "logps/rejected": -5.399662494659424, |
| "loss": 2.1715, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -44.72518539428711, |
| "rewards/margins": 9.271440505981445, |
| "rewards/rejected": -53.99662399291992, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 137.86076074457236, |
| "learning_rate": 2.9695634697110315e-08, |
| "logits/chosen": -1.1298010349273682, |
| "logits/rejected": -1.1351279020309448, |
| "logps/chosen": -4.120969772338867, |
| "logps/rejected": -4.982769966125488, |
| "loss": 2.6108, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -41.209693908691406, |
| "rewards/margins": 8.618008613586426, |
| "rewards/rejected": -49.82770538330078, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8911506926774102, |
| "grad_norm": 205.78409246729305, |
| "learning_rate": 2.8510399425372766e-08, |
| "logits/chosen": -1.169985294342041, |
| "logits/rejected": -1.1676735877990723, |
| "logps/chosen": -4.293628692626953, |
| "logps/rejected": -4.940824508666992, |
| "loss": 2.4817, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -42.93628692626953, |
| "rewards/margins": 6.471960067749023, |
| "rewards/rejected": -49.40824890136719, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8934124964659316, |
| "grad_norm": 156.66716455473116, |
| "learning_rate": 2.734842701466329e-08, |
| "logits/chosen": -1.1552950143814087, |
| "logits/rejected": -1.1497843265533447, |
| "logps/chosen": -4.565382957458496, |
| "logps/rejected": -5.28816032409668, |
| "loss": 2.3395, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -45.653831481933594, |
| "rewards/margins": 7.227770805358887, |
| "rewards/rejected": -52.88159942626953, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8956743002544529, |
| "grad_norm": 123.71662611686183, |
| "learning_rate": 2.6209790228264438e-08, |
| "logits/chosen": -1.159185528755188, |
| "logits/rejected": -1.1655610799789429, |
| "logps/chosen": -3.889777183532715, |
| "logps/rejected": -4.674656867980957, |
| "loss": 2.1273, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -38.89777755737305, |
| "rewards/margins": 7.848790168762207, |
| "rewards/rejected": -46.7465705871582, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8979361040429743, |
| "grad_norm": 147.95551170482682, |
| "learning_rate": 2.5094560368170305e-08, |
| "logits/chosen": -1.1451451778411865, |
| "logits/rejected": -1.1695401668548584, |
| "logps/chosen": -4.563682556152344, |
| "logps/rejected": -5.299858570098877, |
| "loss": 2.1221, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -45.63682556152344, |
| "rewards/margins": 7.361759185791016, |
| "rewards/rejected": -52.99858093261719, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.9001979078314957, |
| "grad_norm": 154.64080727041423, |
| "learning_rate": 2.4002807270621893e-08, |
| "logits/chosen": -1.1961959600448608, |
| "logits/rejected": -1.1858330965042114, |
| "logps/chosen": -4.226797103881836, |
| "logps/rejected": -4.973693370819092, |
| "loss": 2.3421, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -42.26797103881836, |
| "rewards/margins": 7.468969821929932, |
| "rewards/rejected": -49.7369384765625, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.9024597116200169, |
| "grad_norm": 152.1828038337204, |
| "learning_rate": 2.293459930173354e-08, |
| "logits/chosen": -1.195888876914978, |
| "logits/rejected": -1.2131280899047852, |
| "logps/chosen": -4.33859395980835, |
| "logps/rejected": -5.055052757263184, |
| "loss": 2.6306, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -43.38593673706055, |
| "rewards/margins": 7.164592742919922, |
| "rewards/rejected": -50.5505256652832, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.9047215154085383, |
| "grad_norm": 143.11836038738673, |
| "learning_rate": 2.189000335321256e-08, |
| "logits/chosen": -1.1481764316558838, |
| "logits/rejected": -1.150956153869629, |
| "logps/chosen": -4.231009483337402, |
| "logps/rejected": -4.886160373687744, |
| "loss": 2.9376, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -42.310096740722656, |
| "rewards/margins": 6.551509380340576, |
| "rewards/rejected": -48.861602783203125, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9069833191970597, |
| "grad_norm": 149.95401926590577, |
| "learning_rate": 2.086908483816954e-08, |
| "logits/chosen": -1.1622329950332642, |
| "logits/rejected": -1.1632294654846191, |
| "logps/chosen": -4.43674373626709, |
| "logps/rejected": -5.167576313018799, |
| "loss": 2.2468, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -44.367435455322266, |
| "rewards/margins": 7.308327674865723, |
| "rewards/rejected": -51.67576217651367, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.909245122985581, |
| "grad_norm": 141.35952916507554, |
| "learning_rate": 1.9871907687022717e-08, |
| "logits/chosen": -1.150231957435608, |
| "logits/rejected": -1.1642488241195679, |
| "logps/chosen": -4.147550106048584, |
| "logps/rejected": -4.799522399902344, |
| "loss": 2.3613, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -41.475502014160156, |
| "rewards/margins": 6.519725799560547, |
| "rewards/rejected": -47.99523162841797, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9115069267741024, |
| "grad_norm": 130.65953710722863, |
| "learning_rate": 1.889853434349451e-08, |
| "logits/chosen": -1.1447476148605347, |
| "logits/rejected": -1.1627604961395264, |
| "logps/chosen": -4.104800224304199, |
| "logps/rejected": -4.9086785316467285, |
| "loss": 2.5277, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -41.04800033569336, |
| "rewards/margins": 8.03878402709961, |
| "rewards/rejected": -49.08678436279297, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9137687305626236, |
| "grad_norm": 144.0791755156075, |
| "learning_rate": 1.7949025760701164e-08, |
| "logits/chosen": -1.1415810585021973, |
| "logits/rejected": -1.1473815441131592, |
| "logps/chosen": -4.432383060455322, |
| "logps/rejected": -5.151994705200195, |
| "loss": 2.2592, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -44.323829650878906, |
| "rewards/margins": 7.1961164474487305, |
| "rewards/rejected": -51.51994705200195, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.916030534351145, |
| "grad_norm": 140.1370708813358, |
| "learning_rate": 1.7023441397336023e-08, |
| "logits/chosen": -1.1852596998214722, |
| "logits/rejected": -1.2118213176727295, |
| "logps/chosen": -4.218780994415283, |
| "logps/rejected": -5.021721363067627, |
| "loss": 2.0941, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -42.18781661987305, |
| "rewards/margins": 8.029399871826172, |
| "rewards/rejected": -50.21721267700195, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9182923381396664, |
| "grad_norm": 154.58794315173094, |
| "learning_rate": 1.6121839213945854e-08, |
| "logits/chosen": -1.1428247690200806, |
| "logits/rejected": -1.1743805408477783, |
| "logps/chosen": -4.260222911834717, |
| "logps/rejected": -4.994080543518066, |
| "loss": 3.0364, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -42.602230072021484, |
| "rewards/margins": 7.3385748863220215, |
| "rewards/rejected": -49.9408073425293, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.9205541419281877, |
| "grad_norm": 174.30248594338545, |
| "learning_rate": 1.5244275669301777e-08, |
| "logits/chosen": -1.1678388118743896, |
| "logits/rejected": -1.1716469526290894, |
| "logps/chosen": -4.424648284912109, |
| "logps/rejected": -5.160457611083984, |
| "loss": 2.9157, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -44.246482849121094, |
| "rewards/margins": 7.358092784881592, |
| "rewards/rejected": -51.604576110839844, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9228159457167091, |
| "grad_norm": 161.69664780097952, |
| "learning_rate": 1.4390805716863398e-08, |
| "logits/chosen": -1.1633141040802002, |
| "logits/rejected": -1.1669323444366455, |
| "logps/chosen": -4.22412109375, |
| "logps/rejected": -4.885556697845459, |
| "loss": 2.8954, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -42.241207122802734, |
| "rewards/margins": 6.614358901977539, |
| "rewards/rejected": -48.855567932128906, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9250777495052305, |
| "grad_norm": 149.67343386390974, |
| "learning_rate": 1.3561482801337908e-08, |
| "logits/chosen": -1.1175371408462524, |
| "logits/rejected": -1.1360092163085938, |
| "logps/chosen": -4.270062446594238, |
| "logps/rejected": -5.0212225914001465, |
| "loss": 2.8054, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -42.700626373291016, |
| "rewards/margins": 7.511605262756348, |
| "rewards/rejected": -50.21223068237305, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9273395532937517, |
| "grad_norm": 166.06441280121587, |
| "learning_rate": 1.2756358855332904e-08, |
| "logits/chosen": -1.1701834201812744, |
| "logits/rejected": -1.1796129941940308, |
| "logps/chosen": -4.174693584442139, |
| "logps/rejected": -4.778913497924805, |
| "loss": 3.0222, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -41.74692916870117, |
| "rewards/margins": 6.042202472686768, |
| "rewards/rejected": -47.78913497924805, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9296013570822731, |
| "grad_norm": 156.45557835212637, |
| "learning_rate": 1.1975484296105154e-08, |
| "logits/chosen": -1.1475261449813843, |
| "logits/rejected": -1.1557633876800537, |
| "logps/chosen": -4.335869789123535, |
| "logps/rejected": -5.053281784057617, |
| "loss": 2.8032, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -43.35869598388672, |
| "rewards/margins": 7.1741180419921875, |
| "rewards/rejected": -50.53281021118164, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9318631608707945, |
| "grad_norm": 150.89002422183648, |
| "learning_rate": 1.1218908022402374e-08, |
| "logits/chosen": -1.1364606618881226, |
| "logits/rejected": -1.150048851966858, |
| "logps/chosen": -4.117517471313477, |
| "logps/rejected": -4.878651142120361, |
| "loss": 2.6882, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -41.17517852783203, |
| "rewards/margins": 7.611327648162842, |
| "rewards/rejected": -48.78650665283203, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9341249646593158, |
| "grad_norm": 138.25948330744123, |
| "learning_rate": 1.0486677411402079e-08, |
| "logits/chosen": -1.2186678647994995, |
| "logits/rejected": -1.2154583930969238, |
| "logps/chosen": -4.54112434387207, |
| "logps/rejected": -5.477083206176758, |
| "loss": 2.4561, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -45.41124725341797, |
| "rewards/margins": 9.359580039978027, |
| "rewards/rejected": -54.77082824707031, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9363867684478372, |
| "grad_norm": 141.68327115023573, |
| "learning_rate": 9.778838315744353e-09, |
| "logits/chosen": -1.1879788637161255, |
| "logits/rejected": -1.2011134624481201, |
| "logps/chosen": -4.5947699546813965, |
| "logps/rejected": -5.385745525360107, |
| "loss": 2.2977, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -45.94770050048828, |
| "rewards/margins": 7.909754753112793, |
| "rewards/rejected": -53.85745620727539, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9386485722363584, |
| "grad_norm": 155.86855773359468, |
| "learning_rate": 9.095435060660595e-09, |
| "logits/chosen": -1.131690502166748, |
| "logits/rejected": -1.1426740884780884, |
| "logps/chosen": -4.285680770874023, |
| "logps/rejected": -5.050602436065674, |
| "loss": 2.6046, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -42.85680389404297, |
| "rewards/margins": 7.64921760559082, |
| "rewards/rejected": -50.50602340698242, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9409103760248798, |
| "grad_norm": 179.16381227507324, |
| "learning_rate": 8.436510441197864e-09, |
| "logits/chosen": -1.1493927240371704, |
| "logits/rejected": -1.170986533164978, |
| "logps/chosen": -4.301328659057617, |
| "logps/rejected": -5.077293872833252, |
| "loss": 2.7123, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -43.01328659057617, |
| "rewards/margins": 7.759650230407715, |
| "rewards/rejected": -50.77294158935547, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9431721798134012, |
| "grad_norm": 178.14439119940326, |
| "learning_rate": 7.802105719539076e-09, |
| "logits/chosen": -1.1591538190841675, |
| "logits/rejected": -1.1537411212921143, |
| "logps/chosen": -4.516260147094727, |
| "logps/rejected": -5.185364246368408, |
| "loss": 3.2654, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -45.162601470947266, |
| "rewards/margins": 6.691040992736816, |
| "rewards/rejected": -51.85364532470703, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9454339836019225, |
| "grad_norm": 143.6671474067502, |
| "learning_rate": 7.1922606224192e-09, |
| "logits/chosen": -1.1799875497817993, |
| "logits/rejected": -1.1871784925460815, |
| "logps/chosen": -4.451123237609863, |
| "logps/rejected": -5.2011637687683105, |
| "loss": 2.2148, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -44.51123046875, |
| "rewards/margins": 7.50040340423584, |
| "rewards/rejected": -52.011634826660156, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9476957873904439, |
| "grad_norm": 180.34081751631268, |
| "learning_rate": 6.6070133386372906e-09, |
| "logits/chosen": -1.1689175367355347, |
| "logits/rejected": -1.1665769815444946, |
| "logps/chosen": -4.324338912963867, |
| "logps/rejected": -4.959226131439209, |
| "loss": 3.0841, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -43.24338912963867, |
| "rewards/margins": 6.348876953125, |
| "rewards/rejected": -49.59226608276367, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9499575911789653, |
| "grad_norm": 146.1251333430689, |
| "learning_rate": 6.046400516665384e-09, |
| "logits/chosen": -1.1695650815963745, |
| "logits/rejected": -1.1703170537948608, |
| "logps/chosen": -4.297419548034668, |
| "logps/rejected": -5.06644868850708, |
| "loss": 2.6095, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -42.97419738769531, |
| "rewards/margins": 7.69029426574707, |
| "rewards/rejected": -50.664485931396484, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9522193949674865, |
| "grad_norm": 155.5449189325342, |
| "learning_rate": 5.510457262353396e-09, |
| "logits/chosen": -1.1957755088806152, |
| "logits/rejected": -1.2013041973114014, |
| "logps/chosen": -4.236740589141846, |
| "logps/rejected": -4.945225715637207, |
| "loss": 2.1589, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -42.36740493774414, |
| "rewards/margins": 7.084850311279297, |
| "rewards/rejected": -49.4522590637207, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9544811987560079, |
| "grad_norm": 138.48331466546585, |
| "learning_rate": 4.9992171367309265e-09, |
| "logits/chosen": -1.1926621198654175, |
| "logits/rejected": -1.1839208602905273, |
| "logps/chosen": -4.090144157409668, |
| "logps/rejected": -4.805689811706543, |
| "loss": 2.4128, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -40.90143966674805, |
| "rewards/margins": 7.155453205108643, |
| "rewards/rejected": -48.0568962097168, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9567430025445293, |
| "grad_norm": 158.48354828470264, |
| "learning_rate": 4.5127121539052955e-09, |
| "logits/chosen": -1.1851263046264648, |
| "logits/rejected": -1.195054292678833, |
| "logps/chosen": -4.540511131286621, |
| "logps/rejected": -5.315212249755859, |
| "loss": 2.4374, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -45.405113220214844, |
| "rewards/margins": 7.747008323669434, |
| "rewards/rejected": -53.152122497558594, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9590048063330506, |
| "grad_norm": 161.76738943972168, |
| "learning_rate": 4.050972779057327e-09, |
| "logits/chosen": -1.0983150005340576, |
| "logits/rejected": -1.1198445558547974, |
| "logps/chosen": -4.050824165344238, |
| "logps/rejected": -4.785923004150391, |
| "loss": 2.516, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -40.50823974609375, |
| "rewards/margins": 7.350987434387207, |
| "rewards/rejected": -47.859230041503906, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.961266610121572, |
| "grad_norm": 147.16124784211212, |
| "learning_rate": 3.6140279265330477e-09, |
| "logits/chosen": -1.1477775573730469, |
| "logits/rejected": -1.151658058166504, |
| "logps/chosen": -4.37608003616333, |
| "logps/rejected": -5.113864421844482, |
| "loss": 2.3465, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -43.76080322265625, |
| "rewards/margins": 7.377841472625732, |
| "rewards/rejected": -51.13864517211914, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9635284139100933, |
| "grad_norm": 156.27921111967768, |
| "learning_rate": 3.2019049580335853e-09, |
| "logits/chosen": -1.1753405332565308, |
| "logits/rejected": -1.172703504562378, |
| "logps/chosen": -4.146142959594727, |
| "logps/rejected": -4.741412162780762, |
| "loss": 2.9867, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -41.461429595947266, |
| "rewards/margins": 5.952691555023193, |
| "rewards/rejected": -47.41412353515625, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9657902176986146, |
| "grad_norm": 152.1907684077805, |
| "learning_rate": 2.814629680901337e-09, |
| "logits/chosen": -1.2028779983520508, |
| "logits/rejected": -1.2054622173309326, |
| "logps/chosen": -4.460807800292969, |
| "logps/rejected": -5.132265090942383, |
| "loss": 2.6493, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -44.60807418823242, |
| "rewards/margins": 6.714574337005615, |
| "rewards/rejected": -51.32265090942383, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.968052021487136, |
| "grad_norm": 155.37833094133038, |
| "learning_rate": 2.4522263465041937e-09, |
| "logits/chosen": -1.1574562788009644, |
| "logits/rejected": -1.175394058227539, |
| "logps/chosen": -4.284934043884277, |
| "logps/rejected": -5.051609039306641, |
| "loss": 1.8433, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -42.84933853149414, |
| "rewards/margins": 7.666755676269531, |
| "rewards/rejected": -50.51609802246094, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9703138252756573, |
| "grad_norm": 144.34283399834695, |
| "learning_rate": 2.114717648716713e-09, |
| "logits/chosen": -1.1277655363082886, |
| "logits/rejected": -1.1411540508270264, |
| "logps/chosen": -4.294366836547852, |
| "logps/rejected": -5.148277282714844, |
| "loss": 2.0689, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -42.943668365478516, |
| "rewards/margins": 8.539103507995605, |
| "rewards/rejected": -51.4827766418457, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9725756290641787, |
| "grad_norm": 154.89691227614196, |
| "learning_rate": 1.802124722499121e-09, |
| "logits/chosen": -1.1664525270462036, |
| "logits/rejected": -1.1698546409606934, |
| "logps/chosen": -4.270165920257568, |
| "logps/rejected": -5.067376136779785, |
| "loss": 2.4589, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -42.701656341552734, |
| "rewards/margins": 7.97210168838501, |
| "rewards/rejected": -50.67375946044922, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9748374328527001, |
| "grad_norm": 175.9345178956199, |
| "learning_rate": 1.5144671425737499e-09, |
| "logits/chosen": -1.1535289287567139, |
| "logits/rejected": -1.1616544723510742, |
| "logps/chosen": -3.9937241077423096, |
| "logps/rejected": -4.663761138916016, |
| "loss": 3.1278, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -39.93724060058594, |
| "rewards/margins": 6.700366497039795, |
| "rewards/rejected": -46.637611389160156, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9770992366412213, |
| "grad_norm": 159.6613123701801, |
| "learning_rate": 1.251762922199484e-09, |
| "logits/chosen": -1.1003191471099854, |
| "logits/rejected": -1.1101816892623901, |
| "logps/chosen": -4.444840431213379, |
| "logps/rejected": -5.245765209197998, |
| "loss": 1.9898, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -44.448402404785156, |
| "rewards/margins": 8.009248733520508, |
| "rewards/rejected": -52.45764923095703, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9793610404297427, |
| "grad_norm": 132.41596768015958, |
| "learning_rate": 1.0140285120433744e-09, |
| "logits/chosen": -1.1827129125595093, |
| "logits/rejected": -1.1924540996551514, |
| "logps/chosen": -4.455898284912109, |
| "logps/rejected": -5.210501670837402, |
| "loss": 2.519, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -44.558982849121094, |
| "rewards/margins": 7.546034336090088, |
| "rewards/rejected": -52.10501480102539, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9816228442182641, |
| "grad_norm": 150.85793494364853, |
| "learning_rate": 8.012787991508396e-10, |
| "logits/chosen": -1.147634506225586, |
| "logits/rejected": -1.1722147464752197, |
| "logps/chosen": -4.21061897277832, |
| "logps/rejected": -5.051124572753906, |
| "loss": 2.6106, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -42.1061897277832, |
| "rewards/margins": 8.405055046081543, |
| "rewards/rejected": -50.5112419128418, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9838846480067854, |
| "grad_norm": 128.47098525306382, |
| "learning_rate": 6.135271060133007e-10, |
| "logits/chosen": -1.1236947774887085, |
| "logits/rejected": -1.1270819902420044, |
| "logps/chosen": -4.134029865264893, |
| "logps/rejected": -4.879709243774414, |
| "loss": 2.4142, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -41.34030532836914, |
| "rewards/margins": 7.456791400909424, |
| "rewards/rejected": -48.797096252441406, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9861464517953068, |
| "grad_norm": 151.15908258364348, |
| "learning_rate": 4.50785189733871e-10, |
| "logits/chosen": -1.1433157920837402, |
| "logits/rejected": -1.1700868606567383, |
| "logps/chosen": -4.154694557189941, |
| "logps/rejected": -4.998684883117676, |
| "loss": 2.2485, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -41.54694366455078, |
| "rewards/margins": 8.439903259277344, |
| "rewards/rejected": -49.986846923828125, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.988408255583828, |
| "grad_norm": 143.0805408958406, |
| "learning_rate": 3.1306324129118935e-10, |
| "logits/chosen": -1.1328377723693848, |
| "logits/rejected": -1.1593248844146729, |
| "logps/chosen": -4.32177209854126, |
| "logps/rejected": -5.004055023193359, |
| "loss": 2.586, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -43.21772766113281, |
| "rewards/margins": 6.822832107543945, |
| "rewards/rejected": -50.040550231933594, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9906700593723494, |
| "grad_norm": 183.31095051381905, |
| "learning_rate": 2.003698849011748e-10, |
| "logits/chosen": -1.195257306098938, |
| "logits/rejected": -1.1942667961120605, |
| "logps/chosen": -4.546604156494141, |
| "logps/rejected": -5.205718517303467, |
| "loss": 2.5836, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -45.46604537963867, |
| "rewards/margins": 6.591144561767578, |
| "rewards/rejected": -52.057186126708984, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9929318631608708, |
| "grad_norm": 156.79957243989656, |
| "learning_rate": 1.1271217747714779e-10, |
| "logits/chosen": -1.1826034784317017, |
| "logits/rejected": -1.2057559490203857, |
| "logps/chosen": -4.436058521270752, |
| "logps/rejected": -5.084158897399902, |
| "loss": 2.8188, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -44.3605842590332, |
| "rewards/margins": 6.481000900268555, |
| "rewards/rejected": -50.84158706665039, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9951936669493922, |
| "grad_norm": 154.28655412323974, |
| "learning_rate": 5.0095608187739055e-11, |
| "logits/chosen": -1.151612639427185, |
| "logits/rejected": -1.1684077978134155, |
| "logps/chosen": -4.0718464851379395, |
| "logps/rejected": -4.774002552032471, |
| "loss": 2.6165, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -40.71846389770508, |
| "rewards/margins": 7.021560192108154, |
| "rewards/rejected": -47.74002456665039, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9974554707379135, |
| "grad_norm": 170.7047305284887, |
| "learning_rate": 1.2524098113209092e-11, |
| "logits/chosen": -1.1892815828323364, |
| "logits/rejected": -1.1911449432373047, |
| "logps/chosen": -4.447210788726807, |
| "logps/rejected": -5.045917987823486, |
| "loss": 3.3375, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -44.472110748291016, |
| "rewards/margins": 5.987071990966797, |
| "rewards/rejected": -50.45918273925781, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "grad_norm": 162.25573622129824, |
| "learning_rate": 0.0, |
| "logits/chosen": -1.1888779401779175, |
| "logits/rejected": -1.1964941024780273, |
| "logps/chosen": -4.214119911193848, |
| "logps/rejected": -4.939702987670898, |
| "loss": 2.7514, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -42.14120101928711, |
| "rewards/margins": 7.255833148956299, |
| "rewards/rejected": -49.39703369140625, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "eval_logits/chosen": -1.1595183610916138, |
| "eval_logits/rejected": -1.1711369752883911, |
| "eval_logps/chosen": -4.307417392730713, |
| "eval_logps/rejected": -5.051736831665039, |
| "eval_loss": 2.3919546604156494, |
| "eval_rewards/accuracies": 0.8038102388381958, |
| "eval_rewards/chosen": -43.07417297363281, |
| "eval_rewards/margins": 7.443192958831787, |
| "eval_rewards/rejected": -50.517372131347656, |
| "eval_runtime": 100.8538, |
| "eval_samples_per_second": 29.538, |
| "eval_steps_per_second": 1.854, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9997172745264349, |
| "step": 442, |
| "total_flos": 134366991482880.0, |
| "train_loss": 3.293350306571339, |
| "train_runtime": 7617.916, |
| "train_samples_per_second": 7.429, |
| "train_steps_per_second": 0.058 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 442, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 134366991482880.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|