| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9871794871794872, |
| "eval_steps": 500, |
| "global_step": 699, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.021367521367521368, |
| "grad_norm": 16.125, |
| "learning_rate": 1.7556090538745385e-06, |
| "logits/chosen": -3.5723044872283936, |
| "logits/rejected": -3.5033211708068848, |
| "logps/chosen": -41.10147476196289, |
| "logps/rejected": -79.84379577636719, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.3291666507720947, |
| "rewards/chosen": 0.0008107174071483314, |
| "rewards/margins": 0.0017922676634043455, |
| "rewards/rejected": -0.0009815500816330314, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.042735042735042736, |
| "grad_norm": 16.75, |
| "learning_rate": 3.950120371217711e-06, |
| "logits/chosen": -3.5865039825439453, |
| "logits/rejected": -3.506371021270752, |
| "logps/chosen": -40.01554870605469, |
| "logps/rejected": -78.2536849975586, |
| "loss": 0.679, |
| "rewards/accuracies": 0.8291667103767395, |
| "rewards/chosen": -0.0026048908475786448, |
| "rewards/margins": 0.028653645887970924, |
| "rewards/rejected": -0.031258534640073776, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0641025641025641, |
| "grad_norm": 14.6875, |
| "learning_rate": 6.144631688560886e-06, |
| "logits/chosen": -3.5993950366973877, |
| "logits/rejected": -3.5078125, |
| "logps/chosen": -39.8319206237793, |
| "logps/rejected": -81.44599151611328, |
| "loss": 0.6296, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.006084696389734745, |
| "rewards/margins": 0.1335393637418747, |
| "rewards/rejected": -0.13962405920028687, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 13.75, |
| "learning_rate": 8.339143005904057e-06, |
| "logits/chosen": -3.5526351928710938, |
| "logits/rejected": -3.4919040203094482, |
| "logps/chosen": -39.764862060546875, |
| "logps/rejected": -80.73335266113281, |
| "loss": 0.5434, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -0.015795540064573288, |
| "rewards/margins": 0.33912044763565063, |
| "rewards/rejected": -0.3549160361289978, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10683760683760683, |
| "grad_norm": 11.1875, |
| "learning_rate": 1.0533654323247232e-05, |
| "logits/chosen": -3.5133590698242188, |
| "logits/rejected": -3.472569227218628, |
| "logps/chosen": -40.0025749206543, |
| "logps/rejected": -85.53997802734375, |
| "loss": 0.3966, |
| "rewards/accuracies": 0.9791666865348816, |
| "rewards/chosen": -0.04189577326178551, |
| "rewards/margins": 0.7760334014892578, |
| "rewards/rejected": -0.8179291486740112, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 7.59375, |
| "learning_rate": 1.2728165640590407e-05, |
| "logits/chosen": -3.44189190864563, |
| "logits/rejected": -3.4465813636779785, |
| "logps/chosen": -42.446632385253906, |
| "logps/rejected": -93.0277328491211, |
| "loss": 0.2592, |
| "rewards/accuracies": 0.9958332777023315, |
| "rewards/chosen": -0.1323380172252655, |
| "rewards/margins": 1.4258702993392944, |
| "rewards/rejected": -1.5582085847854614, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14957264957264957, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.4922676957933578e-05, |
| "logits/chosen": -3.223677158355713, |
| "logits/rejected": -3.2687485218048096, |
| "logps/chosen": -44.17692947387695, |
| "logps/rejected": -108.72818756103516, |
| "loss": 0.1195, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.43831247091293335, |
| "rewards/margins": 2.755509853363037, |
| "rewards/rejected": -3.1938223838806152, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.5360556888469565e-05, |
| "logits/chosen": -2.780639410018921, |
| "logits/rejected": -2.814786911010742, |
| "logps/chosen": -52.43251419067383, |
| "logps/rejected": -143.02699279785156, |
| "loss": 0.0425, |
| "rewards/accuracies": 0.9916666746139526, |
| "rewards/chosen": -1.2548245191574097, |
| "rewards/margins": 5.2223920822143555, |
| "rewards/rejected": -6.477217197418213, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19230769230769232, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.535640428282884e-05, |
| "logits/chosen": -2.3407504558563232, |
| "logits/rejected": -2.3020548820495605, |
| "logps/chosen": -51.26860427856445, |
| "logps/rejected": -161.84654235839844, |
| "loss": 0.0168, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.2653954029083252, |
| "rewards/margins": 7.260884761810303, |
| "rewards/rejected": -8.526280403137207, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "grad_norm": 9.3125, |
| "learning_rate": 1.5349059809872097e-05, |
| "logits/chosen": -1.9962679147720337, |
| "logits/rejected": -1.9376245737075806, |
| "logps/chosen": -57.276153564453125, |
| "logps/rejected": -180.43075561523438, |
| "loss": 0.0199, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.6868196725845337, |
| "rewards/margins": 8.579647064208984, |
| "rewards/rejected": -10.266467094421387, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23504273504273504, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.5338527542732884e-05, |
| "logits/chosen": -1.9884917736053467, |
| "logits/rejected": -1.9206396341323853, |
| "logps/chosen": -58.84391403198242, |
| "logps/rejected": -196.67623901367188, |
| "loss": 0.009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8948497772216797, |
| "rewards/margins": 9.876777648925781, |
| "rewards/rejected": -11.771627426147461, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 5.25, |
| "learning_rate": 1.532481332244717e-05, |
| "logits/chosen": -1.9375540018081665, |
| "logits/rejected": -1.8217322826385498, |
| "logps/chosen": -57.795997619628906, |
| "logps/rejected": -198.72848510742188, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8656346797943115, |
| "rewards/margins": 10.204580307006836, |
| "rewards/rejected": -12.070215225219727, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 16.125, |
| "learning_rate": 1.5307924754713968e-05, |
| "logits/chosen": -1.9060790538787842, |
| "logits/rejected": -1.8332831859588623, |
| "logps/chosen": -58.628753662109375, |
| "logps/rejected": -194.50588989257812, |
| "loss": 0.0115, |
| "rewards/accuracies": 0.9916666746139526, |
| "rewards/chosen": -1.797798752784729, |
| "rewards/margins": 9.891462326049805, |
| "rewards/rejected": -11.689262390136719, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.29914529914529914, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 1.528787120567736e-05, |
| "logits/chosen": -1.974802017211914, |
| "logits/rejected": -1.877560019493103, |
| "logps/chosen": -52.279327392578125, |
| "logps/rejected": -189.9191131591797, |
| "loss": 0.0049, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2973945140838623, |
| "rewards/margins": 10.00140380859375, |
| "rewards/rejected": -11.298797607421875, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32051282051282054, |
| "grad_norm": 0.640625, |
| "learning_rate": 1.526466379673215e-05, |
| "logits/chosen": -1.9735548496246338, |
| "logits/rejected": -1.870365858078003, |
| "logps/chosen": -53.06673049926758, |
| "logps/rejected": -199.06517028808594, |
| "loss": 0.0071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3975543975830078, |
| "rewards/margins": 10.824033737182617, |
| "rewards/rejected": -12.221589088439941, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 10.5, |
| "learning_rate": 1.5238315398356126e-05, |
| "logits/chosen": -1.9223344326019287, |
| "logits/rejected": -1.7829482555389404, |
| "logps/chosen": -58.65262985229492, |
| "logps/rejected": -205.0301055908203, |
| "loss": 0.0075, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.9159812927246094, |
| "rewards/margins": 10.972046852111816, |
| "rewards/rejected": -12.888028144836426, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.36324786324786323, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 1.5208840622972272e-05, |
| "logits/chosen": -1.9187581539154053, |
| "logits/rejected": -1.742376685142517, |
| "logps/chosen": -60.388633728027344, |
| "logps/rejected": -209.2400360107422, |
| "loss": 0.0215, |
| "rewards/accuracies": 0.9916666746139526, |
| "rewards/chosen": -1.904120683670044, |
| "rewards/margins": 11.195540428161621, |
| "rewards/rejected": -13.099660873413086, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.97265625, |
| "learning_rate": 1.5176255816844948e-05, |
| "logits/chosen": -1.870398759841919, |
| "logits/rejected": -1.6664142608642578, |
| "logps/chosen": -52.21706008911133, |
| "logps/rejected": -206.40274047851562, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3197202682495117, |
| "rewards/margins": 11.56867504119873, |
| "rewards/rejected": -12.888395309448242, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.405982905982906, |
| "grad_norm": 0.609375, |
| "learning_rate": 1.5140579051014502e-05, |
| "logits/chosen": -1.8215786218643188, |
| "logits/rejected": -1.587632179260254, |
| "logps/chosen": -56.239166259765625, |
| "logps/rejected": -221.23486328125, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6193969249725342, |
| "rewards/margins": 12.424324035644531, |
| "rewards/rejected": -14.043721199035645, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 0.19140625, |
| "learning_rate": 1.5101830111275334e-05, |
| "logits/chosen": -1.761718511581421, |
| "logits/rejected": -1.5578025579452515, |
| "logps/chosen": -58.93622970581055, |
| "logps/rejected": -211.36245727539062, |
| "loss": 0.0134, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.9031003713607788, |
| "rewards/margins": 11.43458366394043, |
| "rewards/rejected": -13.337686538696289, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44871794871794873, |
| "grad_norm": 5.25, |
| "learning_rate": 1.5060030487203004e-05, |
| "logits/chosen": -1.7170766592025757, |
| "logits/rejected": -1.4669135808944702, |
| "logps/chosen": -59.824798583984375, |
| "logps/rejected": -224.89785766601562, |
| "loss": 0.0058, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.9724791049957275, |
| "rewards/margins": 12.608641624450684, |
| "rewards/rejected": -14.581120491027832, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4700854700854701, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 1.501520336023643e-05, |
| "logits/chosen": -1.6807842254638672, |
| "logits/rejected": -1.377798318862915, |
| "logps/chosen": -60.823036193847656, |
| "logps/rejected": -220.12026977539062, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.0682151317596436, |
| "rewards/margins": 12.238883972167969, |
| "rewards/rejected": -14.307100296020508, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.49145299145299143, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.4967373590821828e-05, |
| "logits/chosen": -1.6894537210464478, |
| "logits/rejected": -1.387304425239563, |
| "logps/chosen": -70.60054779052734, |
| "logps/rejected": -224.40921020507812, |
| "loss": 0.0135, |
| "rewards/accuracies": 0.9916666746139526, |
| "rewards/chosen": -3.0002286434173584, |
| "rewards/margins": 11.61396598815918, |
| "rewards/rejected": -14.6141939163208, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 1.491656770462546e-05, |
| "logits/chosen": -1.549338936805725, |
| "logits/rejected": -1.1871496438980103, |
| "logps/chosen": -62.74003219604492, |
| "logps/rejected": -226.72341918945312, |
| "loss": 0.0031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2881884574890137, |
| "rewards/margins": 12.516637802124023, |
| "rewards/rejected": -14.804829597473145, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5341880341880342, |
| "grad_norm": 9.5625, |
| "learning_rate": 1.4862813877822923e-05, |
| "logits/chosen": -1.5509467124938965, |
| "logits/rejected": -1.188998818397522, |
| "logps/chosen": -58.65150833129883, |
| "logps/rejected": -226.0467071533203, |
| "loss": 0.0104, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.832358956336975, |
| "rewards/margins": 13.06682300567627, |
| "rewards/rejected": -14.89918327331543, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.1904296875, |
| "learning_rate": 1.4806141921473063e-05, |
| "logits/chosen": -1.520416498184204, |
| "logits/rejected": -1.0859997272491455, |
| "logps/chosen": -57.10784149169922, |
| "logps/rejected": -232.3367156982422, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6952478885650635, |
| "rewards/margins": 13.670125961303711, |
| "rewards/rejected": -15.365373611450195, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5769230769230769, |
| "grad_norm": 0.0791015625, |
| "learning_rate": 1.4746583264985202e-05, |
| "logits/chosen": -1.515005350112915, |
| "logits/rejected": -1.0654128789901733, |
| "logps/chosen": -57.121559143066406, |
| "logps/rejected": -238.92703247070312, |
| "loss": 0.0058, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.67409348487854, |
| "rewards/margins": 14.308405876159668, |
| "rewards/rejected": -15.982501029968262, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5982905982905983, |
| "grad_norm": 0.033203125, |
| "learning_rate": 1.468417093868888e-05, |
| "logits/chosen": -1.547180414199829, |
| "logits/rejected": -1.1224654912948608, |
| "logps/chosen": -55.69971466064453, |
| "logps/rejected": -235.5997772216797, |
| "loss": 0.004, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.6552823781967163, |
| "rewards/margins": 14.057337760925293, |
| "rewards/rejected": -15.712621688842773, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6196581196581197, |
| "grad_norm": 0.09326171875, |
| "learning_rate": 1.4618939555515721e-05, |
| "logits/chosen": -1.4791333675384521, |
| "logits/rejected": -1.0679481029510498, |
| "logps/chosen": -60.25483322143555, |
| "logps/rejected": -235.65640258789062, |
| "loss": 0.0038, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.0124049186706543, |
| "rewards/margins": 13.800189018249512, |
| "rewards/rejected": -15.812593460083008, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.0203857421875, |
| "learning_rate": 1.455092529180363e-05, |
| "logits/chosen": -1.5142757892608643, |
| "logits/rejected": -1.0721313953399658, |
| "logps/chosen": -58.559837341308594, |
| "logps/rejected": -238.7608184814453, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9865844249725342, |
| "rewards/margins": 14.183688163757324, |
| "rewards/rejected": -16.170270919799805, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6623931623931624, |
| "grad_norm": 4.375, |
| "learning_rate": 1.4480165867233946e-05, |
| "logits/chosen": -1.5100654363632202, |
| "logits/rejected": -1.0968310832977295, |
| "logps/chosen": -63.598304748535156, |
| "logps/rejected": -246.39779663085938, |
| "loss": 0.0081, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.3885955810546875, |
| "rewards/margins": 14.358621597290039, |
| "rewards/rejected": -16.747217178344727, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 6.375, |
| "learning_rate": 1.440670052391267e-05, |
| "logits/chosen": -1.5425626039505005, |
| "logits/rejected": -1.1030757427215576, |
| "logps/chosen": -59.99821090698242, |
| "logps/rejected": -236.77932739257812, |
| "loss": 0.0057, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.146557569503784, |
| "rewards/margins": 13.929641723632812, |
| "rewards/rejected": -16.07619857788086, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7051282051282052, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.4330570004607398e-05, |
| "logits/chosen": -1.5993397235870361, |
| "logits/rejected": -1.1524969339370728, |
| "logps/chosen": -58.561241149902344, |
| "logps/rejected": -247.20388793945312, |
| "loss": 0.0033, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.9490623474121094, |
| "rewards/margins": 14.975687026977539, |
| "rewards/rejected": -16.924747467041016, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7264957264957265, |
| "grad_norm": 1.0, |
| "learning_rate": 1.4251816530151986e-05, |
| "logits/chosen": -1.550431489944458, |
| "logits/rejected": -1.151962399482727, |
| "logps/chosen": -62.471397399902344, |
| "logps/rejected": -246.4335479736328, |
| "loss": 0.0038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2203402519226074, |
| "rewards/margins": 14.611509323120117, |
| "rewards/rejected": -16.831850051879883, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7478632478632479, |
| "grad_norm": 0.10791015625, |
| "learning_rate": 1.4170483776031526e-05, |
| "logits/chosen": -1.5044711828231812, |
| "logits/rejected": -1.0406298637390137, |
| "logps/chosen": -64.19065856933594, |
| "logps/rejected": -249.743408203125, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.455766201019287, |
| "rewards/margins": 14.67004680633545, |
| "rewards/rejected": -17.125812530517578, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.30078125, |
| "learning_rate": 1.4086616848160574e-05, |
| "logits/chosen": -1.512284278869629, |
| "logits/rejected": -1.0526224374771118, |
| "logps/chosen": -68.66128540039062, |
| "logps/rejected": -245.33120727539062, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.8888022899627686, |
| "rewards/margins": 13.867166519165039, |
| "rewards/rejected": -16.75596809387207, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7905982905982906, |
| "grad_norm": 0.1064453125, |
| "learning_rate": 1.4000262257868096e-05, |
| "logits/chosen": -1.442132830619812, |
| "logits/rejected": -0.9388168454170227, |
| "logps/chosen": -61.00693893432617, |
| "logps/rejected": -247.60067749023438, |
| "loss": 0.0022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.1700937747955322, |
| "rewards/margins": 14.933375358581543, |
| "rewards/rejected": -17.103466033935547, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.811965811965812, |
| "grad_norm": 0.65625, |
| "learning_rate": 1.3911467896102994e-05, |
| "logits/chosen": -1.3841662406921387, |
| "logits/rejected": -0.9196082353591919, |
| "logps/chosen": -56.78447341918945, |
| "logps/rejected": -246.0129852294922, |
| "loss": 0.0049, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -1.7434440851211548, |
| "rewards/margins": 15.205429077148438, |
| "rewards/rejected": -16.94887351989746, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 1.3820283006874503e-05, |
| "logits/chosen": -1.36122727394104, |
| "logits/rejected": -0.899361789226532, |
| "logps/chosen": -64.0156478881836, |
| "logps/rejected": -245.7220458984375, |
| "loss": 0.0077, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.193249464035034, |
| "rewards/margins": 14.599599838256836, |
| "rewards/rejected": -16.792850494384766, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 0.220703125, |
| "learning_rate": 1.372675815994221e-05, |
| "logits/chosen": -1.2733592987060547, |
| "logits/rejected": -0.7683244347572327, |
| "logps/chosen": -54.458702087402344, |
| "logps/rejected": -250.61141967773438, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6900008916854858, |
| "rewards/margins": 15.71391773223877, |
| "rewards/rejected": -17.403919219970703, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8760683760683761, |
| "grad_norm": 41.75, |
| "learning_rate": 1.3630945222770829e-05, |
| "logits/chosen": -1.3039919137954712, |
| "logits/rejected": -0.7636764645576477, |
| "logps/chosen": -61.440521240234375, |
| "logps/rejected": -256.41033935546875, |
| "loss": 0.0309, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.126218795776367, |
| "rewards/margins": 15.685290336608887, |
| "rewards/rejected": -17.811508178710938, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.3532897331765301e-05, |
| "logits/chosen": -1.339553713798523, |
| "logits/rejected": -0.7716963291168213, |
| "logps/chosen": -62.3395881652832, |
| "logps/rejected": -254.98672485351562, |
| "loss": 0.0111, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.235722064971924, |
| "rewards/margins": 15.472686767578125, |
| "rewards/rejected": -17.70840835571289, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9188034188034188, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.3432668862802134e-05, |
| "logits/chosen": -1.256614089012146, |
| "logits/rejected": -0.6890861988067627, |
| "logps/chosen": -62.10491943359375, |
| "logps/rejected": -249.8031005859375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.166273593902588, |
| "rewards/margins": 14.911642074584961, |
| "rewards/rejected": -17.07791519165039, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9401709401709402, |
| "grad_norm": 0.68359375, |
| "learning_rate": 1.3330315401073371e-05, |
| "logits/chosen": -1.2784340381622314, |
| "logits/rejected": -0.7194468379020691, |
| "logps/chosen": -64.89430236816406, |
| "logps/rejected": -246.3287811279297, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.396063804626465, |
| "rewards/margins": 14.473241806030273, |
| "rewards/rejected": -16.869304656982422, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 11.5625, |
| "learning_rate": 1.3225893710259887e-05, |
| "logits/chosen": -1.1662753820419312, |
| "logits/rejected": -0.5432911515235901, |
| "logps/chosen": -66.24633026123047, |
| "logps/rejected": -253.39212036132812, |
| "loss": 0.01, |
| "rewards/accuracies": 0.9916666746139526, |
| "rewards/chosen": -2.7053072452545166, |
| "rewards/margins": 14.921958923339844, |
| "rewards/rejected": -17.62726402282715, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9829059829059829, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.3119461701051105e-05, |
| "logits/chosen": -1.161176323890686, |
| "logits/rejected": -0.45327743887901306, |
| "logps/chosen": -62.70893096923828, |
| "logps/rejected": -251.8865966796875, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.506155252456665, |
| "rewards/margins": 15.214022636413574, |
| "rewards/rejected": -17.720178604125977, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9957264957264957, |
| "eval_logits/chosen": -1.1960320472717285, |
| "eval_logits/rejected": -0.4940774440765381, |
| "eval_logps/chosen": -66.11089324951172, |
| "eval_logps/rejected": -261.5491943359375, |
| "eval_loss": 0.0009878784185275435, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -2.6667160987854004, |
| "eval_rewards/margins": 15.686513900756836, |
| "eval_rewards/rejected": -18.353229522705078, |
| "eval_runtime": 9.4343, |
| "eval_samples_per_second": 21.199, |
| "eval_steps_per_second": 21.199, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0042735042735043, |
| "grad_norm": 0.11669921875, |
| "learning_rate": 1.3011078399028605e-05, |
| "logits/chosen": -1.1641021966934204, |
| "logits/rejected": -0.48939043283462524, |
| "logps/chosen": -65.07087707519531, |
| "logps/rejected": -266.9398498535156, |
| "loss": 0.0032, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -2.5494394302368164, |
| "rewards/margins": 16.291004180908203, |
| "rewards/rejected": -18.840442657470703, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 0.130859375, |
| "learning_rate": 1.2900803911931431e-05, |
| "logits/chosen": -1.1983628273010254, |
| "logits/rejected": -0.48308929800987244, |
| "logps/chosen": -64.48258209228516, |
| "logps/rejected": -265.9141540527344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.472846746444702, |
| "rewards/margins": 16.26525115966797, |
| "rewards/rejected": -18.73809814453125, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.047008547008547, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 1.2788699396321252e-05, |
| "logits/chosen": -1.1283172369003296, |
| "logits/rejected": -0.419720321893692, |
| "logps/chosen": -62.42974853515625, |
| "logps/rejected": -256.7882385253906, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.3851864337921143, |
| "rewards/margins": 15.727444648742676, |
| "rewards/rejected": -18.11263084411621, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0683760683760684, |
| "grad_norm": 0.000896453857421875, |
| "learning_rate": 1.2674827023665853e-05, |
| "logits/chosen": -1.1639864444732666, |
| "logits/rejected": -0.4902980327606201, |
| "logps/chosen": -67.04655456542969, |
| "logps/rejected": -274.08026123046875, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.747849941253662, |
| "rewards/margins": 16.89676284790039, |
| "rewards/rejected": -19.64461326599121, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0897435897435896, |
| "grad_norm": 0.208984375, |
| "learning_rate": 1.255924994585978e-05, |
| "logits/chosen": -1.1226640939712524, |
| "logits/rejected": -0.4087928235530853, |
| "logps/chosen": -69.7280044555664, |
| "logps/rejected": -270.5373840332031, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9504196643829346, |
| "rewards/margins": 16.320491790771484, |
| "rewards/rejected": -19.27090835571289, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.00885009765625, |
| "learning_rate": 1.2442032260201255e-05, |
| "logits/chosen": -1.1412135362625122, |
| "logits/rejected": -0.4527861475944519, |
| "logps/chosen": -69.53722381591797, |
| "logps/rejected": -270.5766296386719, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.999143123626709, |
| "rewards/margins": 16.23387336730957, |
| "rewards/rejected": -19.233016967773438, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.1324786324786325, |
| "grad_norm": 0.263671875, |
| "learning_rate": 1.2323238973844796e-05, |
| "logits/chosen": -1.1780011653900146, |
| "logits/rejected": -0.45843830704689026, |
| "logps/chosen": -71.02696228027344, |
| "logps/rejected": -280.1233825683594, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0659804344177246, |
| "rewards/margins": 16.907106399536133, |
| "rewards/rejected": -19.973085403442383, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 0.02197265625, |
| "learning_rate": 1.2202935967749212e-05, |
| "logits/chosen": -1.0917080640792847, |
| "logits/rejected": -0.38812121748924255, |
| "logps/chosen": -74.89707946777344, |
| "logps/rejected": -274.8170166015625, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3719239234924316, |
| "rewards/margins": 16.13779067993164, |
| "rewards/rejected": -19.509714126586914, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.1752136752136753, |
| "grad_norm": 0.72265625, |
| "learning_rate": 1.2081189960141038e-05, |
| "logits/chosen": -1.1228703260421753, |
| "logits/rejected": -0.39134496450424194, |
| "logps/chosen": -69.5420913696289, |
| "logps/rejected": -274.7674255371094, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9194247722625732, |
| "rewards/margins": 16.717750549316406, |
| "rewards/rejected": -19.637174606323242, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.1965811965811965, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.1958068469513604e-05, |
| "logits/chosen": -1.1384648084640503, |
| "logits/rejected": -0.39668112993240356, |
| "logps/chosen": -72.6049575805664, |
| "logps/rejected": -283.6707458496094, |
| "loss": 0.0092, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.210206985473633, |
| "rewards/margins": 17.243507385253906, |
| "rewards/rejected": -20.453712463378906, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.217948717948718, |
| "grad_norm": 0.013916015625, |
| "learning_rate": 1.1833639777182316e-05, |
| "logits/chosen": -1.0187714099884033, |
| "logits/rejected": -0.2573033571243286, |
| "logps/chosen": -70.54331970214844, |
| "logps/rejected": -280.8934020996094, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.204702377319336, |
| "rewards/margins": 17.17652702331543, |
| "rewards/rejected": -20.3812313079834, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.2393162393162394, |
| "grad_norm": 0.012451171875, |
| "learning_rate": 1.170797288941685e-05, |
| "logits/chosen": -0.9472154378890991, |
| "logits/rejected": -0.2180492877960205, |
| "logps/chosen": -79.2474594116211, |
| "logps/rejected": -289.6960144042969, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.919898271560669, |
| "rewards/margins": 17.01470947265625, |
| "rewards/rejected": -20.934606552124023, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2606837606837606, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.1581137499171342e-05, |
| "logits/chosen": -0.9705740213394165, |
| "logits/rejected": -0.21009401977062225, |
| "logps/chosen": -79.57368469238281, |
| "logps/rejected": -278.9507751464844, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9748988151550293, |
| "rewards/margins": 16.132137298583984, |
| "rewards/rejected": -20.10703468322754, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 0.017822265625, |
| "learning_rate": 1.145320394743371e-05, |
| "logits/chosen": -0.9654294848442078, |
| "logits/rejected": -0.22467419505119324, |
| "logps/chosen": -74.53304290771484, |
| "logps/rejected": -270.533447265625, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5043253898620605, |
| "rewards/margins": 15.963613510131836, |
| "rewards/rejected": -19.467941284179688, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3034188034188035, |
| "grad_norm": 0.0078125, |
| "learning_rate": 1.1324243184215622e-05, |
| "logits/chosen": -0.969558835029602, |
| "logits/rejected": -0.21970291435718536, |
| "logps/chosen": -73.47796630859375, |
| "logps/rejected": -285.55706787109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5001537799835205, |
| "rewards/margins": 17.251827239990234, |
| "rewards/rejected": -20.75197982788086, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.3247863247863247, |
| "grad_norm": 0.0084228515625, |
| "learning_rate": 1.1194326729204686e-05, |
| "logits/chosen": -1.0194365978240967, |
| "logits/rejected": -0.25868645310401917, |
| "logps/chosen": -73.23660278320312, |
| "logps/rejected": -280.81256103515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4368844032287598, |
| "rewards/margins": 16.979907989501953, |
| "rewards/rejected": -20.416793823242188, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3461538461538463, |
| "grad_norm": 0.00933837890625, |
| "learning_rate": 1.1063526632100717e-05, |
| "logits/chosen": -1.0158953666687012, |
| "logits/rejected": -0.3026788830757141, |
| "logps/chosen": -75.67848205566406, |
| "logps/rejected": -277.49310302734375, |
| "loss": 0.0025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.546459197998047, |
| "rewards/margins": 16.51190948486328, |
| "rewards/rejected": -20.058368682861328, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.3675213675213675, |
| "grad_norm": 0.005615234375, |
| "learning_rate": 1.0931915432658055e-05, |
| "logits/chosen": -1.023720383644104, |
| "logits/rejected": -0.28601619601249695, |
| "logps/chosen": -71.98751068115234, |
| "logps/rejected": -279.75103759765625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2338900566101074, |
| "rewards/margins": 16.945287704467773, |
| "rewards/rejected": -20.17917823791504, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 0.09130859375, |
| "learning_rate": 1.0799566120456133e-05, |
| "logits/chosen": -1.0158692598342896, |
| "logits/rejected": -0.25889790058135986, |
| "logps/chosen": -74.53166198730469, |
| "logps/rejected": -277.46502685546875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4598422050476074, |
| "rewards/margins": 16.516708374023438, |
| "rewards/rejected": -19.976551055908203, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.4102564102564101, |
| "grad_norm": 0.0211181640625, |
| "learning_rate": 1.066655209442054e-05, |
| "logits/chosen": -1.0357223749160767, |
| "logits/rejected": -0.3092297315597534, |
| "logps/chosen": -74.45147705078125, |
| "logps/rejected": -280.28125, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.444270372390747, |
| "rewards/margins": 16.66710090637207, |
| "rewards/rejected": -20.111371994018555, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4316239316239316, |
| "grad_norm": 0.033447265625, |
| "learning_rate": 1.0532947122117101e-05, |
| "logits/chosen": -1.0367907285690308, |
| "logits/rejected": -0.30450788140296936, |
| "logps/chosen": -74.00047302246094, |
| "logps/rejected": -280.2533264160156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4167351722717285, |
| "rewards/margins": 16.851306915283203, |
| "rewards/rejected": -20.268043518066406, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.452991452991453, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 1.0398825298841499e-05, |
| "logits/chosen": -1.0600817203521729, |
| "logits/rejected": -0.41858386993408203, |
| "logps/chosen": -77.15374755859375, |
| "logps/rejected": -283.68585205078125, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.601487636566162, |
| "rewards/margins": 16.883991241455078, |
| "rewards/rejected": -20.4854793548584, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4743589743589745, |
| "grad_norm": 0.0087890625, |
| "learning_rate": 1.0264261006527144e-05, |
| "logits/chosen": -1.1149675846099854, |
| "logits/rejected": -0.4605252742767334, |
| "logps/chosen": -73.10884094238281, |
| "logps/rejected": -273.56329345703125, |
| "loss": 0.0043, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.302705764770508, |
| "rewards/margins": 16.43352508544922, |
| "rewards/rejected": -19.73623275756836, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.4957264957264957, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 1.0129328872494075e-05, |
| "logits/chosen": -1.2343705892562866, |
| "logits/rejected": -0.5823469161987305, |
| "logps/chosen": -70.9728775024414, |
| "logps/rejected": -277.93389892578125, |
| "loss": 0.0029, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.130044460296631, |
| "rewards/margins": 16.93770980834961, |
| "rewards/rejected": -20.067752838134766, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.517094017094017, |
| "grad_norm": 0.166015625, |
| "learning_rate": 9.994103728061786e-06, |
| "logits/chosen": -1.1977897882461548, |
| "logits/rejected": -0.5562113523483276, |
| "logps/chosen": -70.82041931152344, |
| "logps/rejected": -274.3385009765625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.062682867050171, |
| "rewards/margins": 16.56722640991211, |
| "rewards/rejected": -19.62990951538086, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.00537109375, |
| "learning_rate": 9.858660567048902e-06, |
| "logits/chosen": -1.2067844867706299, |
| "logits/rejected": -0.5067782998085022, |
| "logps/chosen": -74.36605834960938, |
| "logps/rejected": -289.45306396484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4267430305480957, |
| "rewards/margins": 17.64672088623047, |
| "rewards/rejected": -21.073461532592773, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.5598290598290598, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 9.72307450418274e-06, |
| "logits/chosen": -1.2308647632598877, |
| "logits/rejected": -0.5679563283920288, |
| "logps/chosen": -69.85784912109375, |
| "logps/rejected": -274.14263916015625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9802072048187256, |
| "rewards/margins": 16.728424072265625, |
| "rewards/rejected": -19.708629608154297, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.5811965811965814, |
| "grad_norm": 0.041015625, |
| "learning_rate": 9.587420733441835e-06, |
| "logits/chosen": -1.227565050125122, |
| "logits/rejected": -0.5805907249450684, |
| "logps/chosen": -68.70391845703125, |
| "logps/rejected": -279.85845947265625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.8893535137176514, |
| "rewards/margins": 17.282146453857422, |
| "rewards/rejected": -20.171499252319336, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.6025641025641026, |
| "grad_norm": 0.000698089599609375, |
| "learning_rate": 9.45177448635447e-06, |
| "logits/chosen": -1.251037836074829, |
| "logits/rejected": -0.515083372592926, |
| "logps/chosen": -68.63763427734375, |
| "logps/rejected": -280.8656921386719, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9085798263549805, |
| "rewards/margins": 17.550891876220703, |
| "rewards/rejected": -20.459474563598633, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.623931623931624, |
| "grad_norm": 0.00811767578125, |
| "learning_rate": 9.316210990276434e-06, |
| "logits/chosen": -1.174579381942749, |
| "logits/rejected": -0.5305734276771545, |
| "logps/chosen": -68.64695739746094, |
| "logps/rejected": -270.04510498046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.869532346725464, |
| "rewards/margins": 16.443248748779297, |
| "rewards/rejected": -19.31278419494629, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6452991452991452, |
| "grad_norm": 0.002349853515625, |
| "learning_rate": 9.18080542667105e-06, |
| "logits/chosen": -1.2183802127838135, |
| "logits/rejected": -0.5133947134017944, |
| "logps/chosen": -69.98764038085938, |
| "logps/rejected": -290.1512756347656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9592034816741943, |
| "rewards/margins": 18.04782485961914, |
| "rewards/rejected": -21.007028579711914, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.01348876953125, |
| "learning_rate": 9.045632889414686e-06, |
| "logits/chosen": -1.2081528902053833, |
| "logits/rejected": -0.5276485681533813, |
| "logps/chosen": -68.10844421386719, |
| "logps/rejected": -281.4985046386719, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.8550543785095215, |
| "rewards/margins": 17.489383697509766, |
| "rewards/rejected": -20.344438552856445, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.688034188034188, |
| "grad_norm": 0.00848388671875, |
| "learning_rate": 8.910768343150828e-06, |
| "logits/chosen": -1.1923013925552368, |
| "logits/rejected": -0.5268815755844116, |
| "logps/chosen": -69.50736999511719, |
| "logps/rejected": -282.33880615234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0535500049591064, |
| "rewards/margins": 17.47011375427246, |
| "rewards/rejected": -20.523662567138672, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "grad_norm": 0.001434326171875, |
| "learning_rate": 8.77628658171581e-06, |
| "logits/chosen": -1.2103922367095947, |
| "logits/rejected": -0.5478588342666626, |
| "logps/chosen": -69.14801025390625, |
| "logps/rejected": -277.53424072265625, |
| "loss": 0.006, |
| "rewards/accuracies": 0.9958332777023315, |
| "rewards/chosen": -2.929593563079834, |
| "rewards/margins": 16.97675895690918, |
| "rewards/rejected": -19.906352996826172, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7307692307692308, |
| "grad_norm": 0.091796875, |
| "learning_rate": 8.642262186659298e-06, |
| "logits/chosen": -1.1698075532913208, |
| "logits/rejected": -0.520237147808075, |
| "logps/chosen": -69.62812805175781, |
| "logps/rejected": -279.01776123046875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9473519325256348, |
| "rewards/margins": 17.070568084716797, |
| "rewards/rejected": -20.01791763305664, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.7521367521367521, |
| "grad_norm": 0.004058837890625, |
| "learning_rate": 8.508769485882487e-06, |
| "logits/chosen": -1.2100353240966797, |
| "logits/rejected": -0.566001296043396, |
| "logps/chosen": -73.57793426513672, |
| "logps/rejected": -278.8910217285156, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.203927516937256, |
| "rewards/margins": 16.867618560791016, |
| "rewards/rejected": -20.07154655456543, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.7735042735042734, |
| "grad_norm": 0.78515625, |
| "learning_rate": 8.375882512416969e-06, |
| "logits/chosen": -1.1643860340118408, |
| "logits/rejected": -0.4784732758998871, |
| "logps/chosen": -72.14900207519531, |
| "logps/rejected": -282.3783264160156, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.1646692752838135, |
| "rewards/margins": 17.262126922607422, |
| "rewards/rejected": -20.426794052124023, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.7948717948717947, |
| "grad_norm": 0.0245361328125, |
| "learning_rate": 8.243674963367137e-06, |
| "logits/chosen": -1.170971393585205, |
| "logits/rejected": -0.4777548909187317, |
| "logps/chosen": -74.64592742919922, |
| "logps/rejected": -281.1402587890625, |
| "loss": 0.0031, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.446406602859497, |
| "rewards/margins": 16.716421127319336, |
| "rewards/rejected": -20.162830352783203, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8162393162393162, |
| "grad_norm": 0.00127410888671875, |
| "learning_rate": 8.11222015903888e-06, |
| "logits/chosen": -1.1845993995666504, |
| "logits/rejected": -0.47238603234291077, |
| "logps/chosen": -75.17137145996094, |
| "logps/rejected": -286.2430725097656, |
| "loss": 0.0046, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.5714709758758545, |
| "rewards/margins": 17.298736572265625, |
| "rewards/rejected": -20.87020492553711, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.8376068376068377, |
| "grad_norm": 0.0174560546875, |
| "learning_rate": 7.981591002277265e-06, |
| "logits/chosen": -1.1712948083877563, |
| "logits/rejected": -0.4820891320705414, |
| "logps/chosen": -74.11454010009766, |
| "logps/rejected": -281.625732421875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.407923460006714, |
| "rewards/margins": 17.11835479736328, |
| "rewards/rejected": -20.526281356811523, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.858974358974359, |
| "grad_norm": 0.01104736328125, |
| "learning_rate": 7.851859938035712e-06, |
| "logits/chosen": -1.149280309677124, |
| "logits/rejected": -0.46685323119163513, |
| "logps/chosen": -75.06929016113281, |
| "logps/rejected": -291.274169921875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.454303026199341, |
| "rewards/margins": 17.817852020263672, |
| "rewards/rejected": -21.272151947021484, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.8803418803418803, |
| "grad_norm": 0.0277099609375, |
| "learning_rate": 7.723098913199118e-06, |
| "logits/chosen": -1.1872258186340332, |
| "logits/rejected": -0.4977366328239441, |
| "logps/chosen": -72.54876708984375, |
| "logps/rejected": -279.2344665527344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3534111976623535, |
| "rewards/margins": 17.049516677856445, |
| "rewards/rejected": -20.402929306030273, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9017094017094016, |
| "grad_norm": 0.01104736328125, |
| "learning_rate": 7.595379336683204e-06, |
| "logits/chosen": -1.1503307819366455, |
| "logits/rejected": -0.40190115571022034, |
| "logps/chosen": -70.95316314697266, |
| "logps/rejected": -287.2573547363281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2571163177490234, |
| "rewards/margins": 17.867298126220703, |
| "rewards/rejected": -21.124412536621094, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.00060272216796875, |
| "learning_rate": 7.468772039832218e-06, |
| "logits/chosen": -1.1172707080841064, |
| "logits/rejected": -0.3832516074180603, |
| "logps/chosen": -66.7195816040039, |
| "logps/rejected": -281.6941833496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.839646816253662, |
| "rewards/margins": 17.725854873657227, |
| "rewards/rejected": -20.56549835205078, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 0.0028839111328125, |
| "learning_rate": 7.3433472371369404e-06, |
| "logits/chosen": -1.1930923461914062, |
| "logits/rejected": -0.48473644256591797, |
| "logps/chosen": -72.68934631347656, |
| "logps/rejected": -284.6398010253906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.318150281906128, |
| "rewards/margins": 17.357219696044922, |
| "rewards/rejected": -20.675371170043945, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.965811965811966, |
| "grad_norm": 0.000492095947265625, |
| "learning_rate": 7.219174487294784e-06, |
| "logits/chosen": -1.1722862720489502, |
| "logits/rejected": -0.47326725721359253, |
| "logps/chosen": -70.49462890625, |
| "logps/rejected": -287.0985107421875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0771074295043945, |
| "rewards/margins": 17.8206844329834, |
| "rewards/rejected": -20.89779281616211, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.9871794871794872, |
| "grad_norm": 0.00872802734375, |
| "learning_rate": 7.0963226546336e-06, |
| "logits/chosen": -1.20412278175354, |
| "logits/rejected": -0.5105618238449097, |
| "logps/chosen": -70.91376495361328, |
| "logps/rejected": -280.29852294921875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.1506786346435547, |
| "rewards/margins": 17.16250228881836, |
| "rewards/rejected": -20.313182830810547, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.9914529914529915, |
| "eval_logits/chosen": -1.2244133949279785, |
| "eval_logits/rejected": -0.5135009288787842, |
| "eval_logps/chosen": -72.20535278320312, |
| "eval_logps/rejected": -285.3635559082031, |
| "eval_loss": 4.7980323870433494e-05, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -3.276163101196289, |
| "eval_rewards/margins": 17.45850372314453, |
| "eval_rewards/rejected": -20.734668731689453, |
| "eval_runtime": 9.4345, |
| "eval_samples_per_second": 21.199, |
| "eval_steps_per_second": 21.199, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.0085470085470085, |
| "grad_norm": 0.024169921875, |
| "learning_rate": 6.974859870920561e-06, |
| "logits/chosen": -1.1253283023834229, |
| "logits/rejected": -0.4625110626220703, |
| "logps/chosen": -72.798095703125, |
| "logps/rejected": -279.7697448730469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3227851390838623, |
| "rewards/margins": 17.012950897216797, |
| "rewards/rejected": -20.335737228393555, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.02991452991453, |
| "grad_norm": 0.00131988525390625, |
| "learning_rate": 6.8548534975773135e-06, |
| "logits/chosen": -1.179386854171753, |
| "logits/rejected": -0.49411916732788086, |
| "logps/chosen": -73.65827178955078, |
| "logps/rejected": -285.2862243652344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.295319080352783, |
| "rewards/margins": 17.375682830810547, |
| "rewards/rejected": -20.671003341674805, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.051282051282051, |
| "grad_norm": 0.189453125, |
| "learning_rate": 6.736370088322359e-06, |
| "logits/chosen": -1.1767910718917847, |
| "logits/rejected": -0.4558785557746887, |
| "logps/chosen": -72.45469665527344, |
| "logps/rejected": -282.46148681640625, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.309537410736084, |
| "rewards/margins": 17.340744018554688, |
| "rewards/rejected": -20.65028190612793, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.072649572649573, |
| "grad_norm": 0.016357421875, |
| "learning_rate": 6.619475352261356e-06, |
| "logits/chosen": -1.1392714977264404, |
| "logits/rejected": -0.45718201994895935, |
| "logps/chosen": -76.55453491210938, |
| "logps/rejected": -285.7558898925781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4542293548583984, |
| "rewards/margins": 17.28127670288086, |
| "rewards/rejected": -20.73550796508789, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.094017094017094, |
| "grad_norm": 0.0069580078125, |
| "learning_rate": 6.504234117445857e-06, |
| "logits/chosen": -1.1605439186096191, |
| "logits/rejected": -0.4634723663330078, |
| "logps/chosen": -72.77029418945312, |
| "logps/rejected": -282.7125549316406, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3726367950439453, |
| "rewards/margins": 17.24945068359375, |
| "rewards/rejected": -20.622087478637695, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.1153846153846154, |
| "grad_norm": 0.007354736328125, |
| "learning_rate": 6.39071029492065e-06, |
| "logits/chosen": -1.1251627206802368, |
| "logits/rejected": -0.4109951853752136, |
| "logps/chosen": -72.30722045898438, |
| "logps/rejected": -281.81280517578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.365689754486084, |
| "rewards/margins": 17.244686126708984, |
| "rewards/rejected": -20.610374450683594, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.1367521367521367, |
| "grad_norm": 0.13671875, |
| "learning_rate": 6.2789668432796535e-06, |
| "logits/chosen": -1.1276848316192627, |
| "logits/rejected": -0.4581735134124756, |
| "logps/chosen": -73.38197326660156, |
| "logps/rejected": -283.4115295410156, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4196181297302246, |
| "rewards/margins": 17.356618881225586, |
| "rewards/rejected": -20.7762393951416, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.1367521367521367, |
| "eval_logits/chosen": -1.2213764190673828, |
| "eval_logits/rejected": -0.508858859539032, |
| "eval_logps/chosen": -72.31729125976562, |
| "eval_logps/rejected": -285.5926818847656, |
| "eval_loss": 4.8542991862632334e-05, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -3.2873549461364746, |
| "eval_rewards/margins": 17.470226287841797, |
| "eval_rewards/rejected": -20.757583618164062, |
| "eval_runtime": 9.4725, |
| "eval_samples_per_second": 21.114, |
| "eval_steps_per_second": 21.114, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.158119658119658, |
| "grad_norm": 0.006744384765625, |
| "learning_rate": 6.16906573375004e-06, |
| "logits/chosen": -1.1647155284881592, |
| "logits/rejected": -0.5106357336044312, |
| "logps/chosen": -72.9385986328125, |
| "logps/rejected": -281.58062744140625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3363139629364014, |
| "rewards/margins": 17.115955352783203, |
| "rewards/rejected": -20.452272415161133, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.1794871794871793, |
| "grad_norm": 0.033203125, |
| "learning_rate": 6.061067915823923e-06, |
| "logits/chosen": -1.102561593055725, |
| "logits/rejected": -0.4104360044002533, |
| "logps/chosen": -70.00373077392578, |
| "logps/rejected": -283.4765319824219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.082702159881592, |
| "rewards/margins": 17.501201629638672, |
| "rewards/rejected": -20.583904266357422, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.200854700854701, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 5.955033283456711e-06, |
| "logits/chosen": -1.136200189590454, |
| "logits/rejected": -0.4127614498138428, |
| "logps/chosen": -77.00238037109375, |
| "logps/rejected": -293.89617919921875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.632338762283325, |
| "rewards/margins": 17.84152603149414, |
| "rewards/rejected": -21.473865509033203, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.00136566162109375, |
| "learning_rate": 5.8510206418507914e-06, |
| "logits/chosen": -1.2033765316009521, |
| "logits/rejected": -0.5239652395248413, |
| "logps/chosen": -74.33930969238281, |
| "logps/rejected": -299.2998352050781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.386591672897339, |
| "rewards/margins": 18.531057357788086, |
| "rewards/rejected": -21.91765022277832, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.2435897435897436, |
| "grad_norm": 0.003875732421875, |
| "learning_rate": 5.749087674843095e-06, |
| "logits/chosen": -1.139147400856018, |
| "logits/rejected": -0.46916326880455017, |
| "logps/chosen": -68.5783920288086, |
| "logps/rejected": -284.41552734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0137269496917725, |
| "rewards/margins": 17.694652557373047, |
| "rewards/rejected": -20.708377838134766, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.264957264957265, |
| "grad_norm": 0.01904296875, |
| "learning_rate": 5.649290912914482e-06, |
| "logits/chosen": -1.1451623439788818, |
| "logits/rejected": -0.47735461592674255, |
| "logps/chosen": -77.09215545654297, |
| "logps/rejected": -298.22796630859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.572205066680908, |
| "rewards/margins": 18.168258666992188, |
| "rewards/rejected": -21.740463256835938, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.286324786324786, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 5.5516857018388144e-06, |
| "logits/chosen": -1.187374472618103, |
| "logits/rejected": -0.5284038782119751, |
| "logps/chosen": -71.90570831298828, |
| "logps/rejected": -280.32940673828125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.206554412841797, |
| "rewards/margins": 17.105445861816406, |
| "rewards/rejected": -20.312000274658203, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.00921630859375, |
| "learning_rate": 5.456326171989005e-06, |
| "logits/chosen": -1.15079927444458, |
| "logits/rejected": -0.4923780858516693, |
| "logps/chosen": -70.75343322753906, |
| "logps/rejected": -297.9629821777344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.109229564666748, |
| "rewards/margins": 18.49727439880371, |
| "rewards/rejected": -21.60650634765625, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.3290598290598292, |
| "grad_norm": 0.00830078125, |
| "learning_rate": 5.363265208317156e-06, |
| "logits/chosen": -1.1182730197906494, |
| "logits/rejected": -0.4831443727016449, |
| "logps/chosen": -71.95735931396484, |
| "logps/rejected": -281.10089111328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.26196551322937, |
| "rewards/margins": 17.25033950805664, |
| "rewards/rejected": -20.512304306030273, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.3504273504273505, |
| "grad_norm": 0.006072998046875, |
| "learning_rate": 5.272554421025347e-06, |
| "logits/chosen": -1.1618000268936157, |
| "logits/rejected": -0.45643243193626404, |
| "logps/chosen": -72.98902893066406, |
| "logps/rejected": -289.97998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.336233139038086, |
| "rewards/margins": 17.833538055419922, |
| "rewards/rejected": -21.169771194458008, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.371794871794872, |
| "grad_norm": 0.008544921875, |
| "learning_rate": 5.184244116943411e-06, |
| "logits/chosen": -1.151729941368103, |
| "logits/rejected": -0.4680960774421692, |
| "logps/chosen": -72.05565643310547, |
| "logps/rejected": -286.6762390136719, |
| "loss": 0.003, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.189112901687622, |
| "rewards/margins": 17.703588485717773, |
| "rewards/rejected": -20.892702102661133, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.393162393162393, |
| "grad_norm": 0.00457763671875, |
| "learning_rate": 5.098383271629512e-06, |
| "logits/chosen": -1.169447660446167, |
| "logits/rejected": -0.4685635566711426, |
| "logps/chosen": -72.01811218261719, |
| "logps/rejected": -279.0551452636719, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2659707069396973, |
| "rewards/margins": 17.044937133789062, |
| "rewards/rejected": -20.3109073638916, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.4145299145299144, |
| "grad_norm": 0.002227783203125, |
| "learning_rate": 5.015019502209056e-06, |
| "logits/chosen": -1.1631286144256592, |
| "logits/rejected": -0.46378326416015625, |
| "logps/chosen": -70.7663803100586, |
| "logps/rejected": -276.07537841796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2191970348358154, |
| "rewards/margins": 16.797992706298828, |
| "rewards/rejected": -20.017189025878906, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.435897435897436, |
| "grad_norm": 0.000713348388671875, |
| "learning_rate": 4.934199040966955e-06, |
| "logits/chosen": -1.185304045677185, |
| "logits/rejected": -0.4863820970058441, |
| "logps/chosen": -71.80037689208984, |
| "logps/rejected": -280.21148681640625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2629001140594482, |
| "rewards/margins": 17.172740936279297, |
| "rewards/rejected": -20.435644149780273, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.4572649572649574, |
| "grad_norm": 0.00372314453125, |
| "learning_rate": 4.855966709707881e-06, |
| "logits/chosen": -1.1501516103744507, |
| "logits/rejected": -0.5059275031089783, |
| "logps/chosen": -75.91123962402344, |
| "logps/rejected": -286.08929443359375, |
| "loss": 0.0029, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.5484020709991455, |
| "rewards/margins": 17.334753036499023, |
| "rewards/rejected": -20.88315773010254, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.4786324786324787, |
| "grad_norm": 0.01190185546875, |
| "learning_rate": 4.780365894898799e-06, |
| "logits/chosen": -1.1519519090652466, |
| "logits/rejected": -0.47328823804855347, |
| "logps/chosen": -73.0169906616211, |
| "logps/rejected": -286.8009948730469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.299699068069458, |
| "rewards/margins": 17.556556701660156, |
| "rewards/rejected": -20.856258392333984, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.007354736328125, |
| "learning_rate": 4.7074385236074684e-06, |
| "logits/chosen": -1.1783647537231445, |
| "logits/rejected": -0.45893925428390503, |
| "logps/chosen": -77.3306655883789, |
| "logps/rejected": -294.39862060546875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5838446617126465, |
| "rewards/margins": 17.903881072998047, |
| "rewards/rejected": -21.487728118896484, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.5213675213675213, |
| "grad_norm": 0.0615234375, |
| "learning_rate": 4.63722504025034e-06, |
| "logits/chosen": -1.1400740146636963, |
| "logits/rejected": -0.455253541469574, |
| "logps/chosen": -70.24212646484375, |
| "logps/rejected": -285.1997985839844, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0748324394226074, |
| "rewards/margins": 17.681386947631836, |
| "rewards/rejected": -20.7562198638916, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.5427350427350426, |
| "grad_norm": 0.00384521484375, |
| "learning_rate": 4.569764384162676e-06, |
| "logits/chosen": -1.1541723012924194, |
| "logits/rejected": -0.43652376532554626, |
| "logps/chosen": -65.93037414550781, |
| "logps/rejected": -284.7110290527344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.755821943283081, |
| "rewards/margins": 17.971193313598633, |
| "rewards/rejected": -20.727014541625977, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 4.50509396800341e-06, |
| "logits/chosen": -1.0580496788024902, |
| "logits/rejected": -0.334017276763916, |
| "logps/chosen": -72.97813415527344, |
| "logps/rejected": -287.36724853515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.329857587814331, |
| "rewards/margins": 17.7404727935791, |
| "rewards/rejected": -21.070331573486328, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.5854700854700856, |
| "grad_norm": 0.02490234375, |
| "learning_rate": 4.443249657006627e-06, |
| "logits/chosen": -1.0786378383636475, |
| "logits/rejected": -0.3501695990562439, |
| "logps/chosen": -70.572509765625, |
| "logps/rejected": -291.306396484375, |
| "loss": 0.0029, |
| "rewards/accuracies": 0.9958333969116211, |
| "rewards/chosen": -3.165311813354492, |
| "rewards/margins": 18.223491668701172, |
| "rewards/rejected": -21.388805389404297, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.606837606837607, |
| "grad_norm": 0.00946044921875, |
| "learning_rate": 4.384265749091266e-06, |
| "logits/chosen": -1.050445318222046, |
| "logits/rejected": -0.34452471137046814, |
| "logps/chosen": -78.69644927978516, |
| "logps/rejected": -289.40362548828125, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7829060554504395, |
| "rewards/margins": 17.385984420776367, |
| "rewards/rejected": -21.16888999938965, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.628205128205128, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 4.328174955840002e-06, |
| "logits/chosen": -1.0618460178375244, |
| "logits/rejected": -0.329507052898407, |
| "logps/chosen": -67.98468017578125, |
| "logps/rejected": -282.68719482421875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0549356937408447, |
| "rewards/margins": 17.611309051513672, |
| "rewards/rejected": -20.666244506835938, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.6495726495726495, |
| "grad_norm": 0.006683349609375, |
| "learning_rate": 4.275008384357902e-06, |
| "logits/chosen": -1.0847241878509521, |
| "logits/rejected": -0.37132930755615234, |
| "logps/chosen": -72.32841491699219, |
| "logps/rejected": -284.59033203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2704505920410156, |
| "rewards/margins": 17.539974212646484, |
| "rewards/rejected": -20.810426712036133, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.6709401709401708, |
| "grad_norm": 0.0023651123046875, |
| "learning_rate": 4.224795520020898e-06, |
| "logits/chosen": -1.0495777130126953, |
| "logits/rejected": -0.3218225836753845, |
| "logps/chosen": -75.63634490966797, |
| "logps/rejected": -283.78515625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4399490356445312, |
| "rewards/margins": 17.156658172607422, |
| "rewards/rejected": -20.596609115600586, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 0.00848388671875, |
| "learning_rate": 4.177564210123634e-06, |
| "logits/chosen": -1.0822218656539917, |
| "logits/rejected": -0.37178927659988403, |
| "logps/chosen": -71.94633483886719, |
| "logps/rejected": -292.45965576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2282912731170654, |
| "rewards/margins": 18.118127822875977, |
| "rewards/rejected": -21.346416473388672, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.713675213675214, |
| "grad_norm": 0.0595703125, |
| "learning_rate": 4.133340648435789e-06, |
| "logits/chosen": -1.0735520124435425, |
| "logits/rejected": -0.33007147908210754, |
| "logps/chosen": -74.36048889160156, |
| "logps/rejected": -289.21795654296875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.496561050415039, |
| "rewards/margins": 17.668859481811523, |
| "rewards/rejected": -21.16541862487793, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.735042735042735, |
| "grad_norm": 0.0281982421875, |
| "learning_rate": 4.092149360675402e-06, |
| "logits/chosen": -1.0645346641540527, |
| "logits/rejected": -0.37463390827178955, |
| "logps/chosen": -78.34007263183594, |
| "logps/rejected": -292.8553771972656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.6229121685028076, |
| "rewards/margins": 17.589675903320312, |
| "rewards/rejected": -21.212587356567383, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.7564102564102564, |
| "grad_norm": 0.034912109375, |
| "learning_rate": 4.054013190907282e-06, |
| "logits/chosen": -1.039671540260315, |
| "logits/rejected": -0.3333393931388855, |
| "logps/chosen": -69.4139404296875, |
| "logps/rejected": -283.67291259765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0979743003845215, |
| "rewards/margins": 17.693653106689453, |
| "rewards/rejected": -20.7916259765625, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 0.0034332275390625, |
| "learning_rate": 4.018953288874035e-06, |
| "logits/chosen": -1.0656228065490723, |
| "logits/rejected": -0.3747131824493408, |
| "logps/chosen": -73.5749282836914, |
| "logps/rejected": -289.0091247558594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.367324113845825, |
| "rewards/margins": 17.781917572021484, |
| "rewards/rejected": -21.149242401123047, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.799145299145299, |
| "grad_norm": 0.00098419189453125, |
| "learning_rate": 3.9869890982667385e-06, |
| "logits/chosen": -1.0841352939605713, |
| "logits/rejected": -0.30320778489112854, |
| "logps/chosen": -71.91044616699219, |
| "logps/rejected": -290.2735595703125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.256523609161377, |
| "rewards/margins": 17.801498413085938, |
| "rewards/rejected": -21.058025360107422, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.8205128205128203, |
| "grad_norm": 0.00970458984375, |
| "learning_rate": 3.9581383459417625e-06, |
| "logits/chosen": -1.052257776260376, |
| "logits/rejected": -0.34810546040534973, |
| "logps/chosen": -78.00955963134766, |
| "logps/rejected": -298.2610168457031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.769831418991089, |
| "rewards/margins": 18.104028701782227, |
| "rewards/rejected": -21.873859405517578, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.841880341880342, |
| "grad_norm": 0.004974365234375, |
| "learning_rate": 3.932417032089722e-06, |
| "logits/chosen": -1.109933614730835, |
| "logits/rejected": -0.38616496324539185, |
| "logps/chosen": -75.1268081665039, |
| "logps/rejected": -294.04522705078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.484327793121338, |
| "rewards/margins": 18.015382766723633, |
| "rewards/rejected": -21.499710083007812, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.8632478632478633, |
| "grad_norm": 0.00125885009765625, |
| "learning_rate": 3.909839421362017e-06, |
| "logits/chosen": -1.0365312099456787, |
| "logits/rejected": -0.3303161561489105, |
| "logps/chosen": -75.15225219726562, |
| "logps/rejected": -289.403076171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5623269081115723, |
| "rewards/margins": 17.46319007873535, |
| "rewards/rejected": -21.025516510009766, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 3.890418034959871e-06, |
| "logits/chosen": -1.0185749530792236, |
| "logits/rejected": -0.24934515357017517, |
| "logps/chosen": -73.84525299072266, |
| "logps/rejected": -287.80670166015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.451679229736328, |
| "rewards/margins": 17.665843963623047, |
| "rewards/rejected": -21.117523193359375, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.905982905982906, |
| "grad_norm": 0.05517578125, |
| "learning_rate": 3.874163643690263e-06, |
| "logits/chosen": -1.0194957256317139, |
| "logits/rejected": -0.2993074655532837, |
| "logps/chosen": -80.23220825195312, |
| "logps/rejected": -291.32550048828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9078011512756348, |
| "rewards/margins": 17.403505325317383, |
| "rewards/rejected": -21.31130599975586, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.9273504273504276, |
| "grad_norm": 0.01055908203125, |
| "learning_rate": 3.861085261992599e-06, |
| "logits/chosen": -1.0856374502182007, |
| "logits/rejected": -0.4421593248844147, |
| "logps/chosen": -77.59326171875, |
| "logps/rejected": -291.57843017578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7359116077423096, |
| "rewards/margins": 17.531463623046875, |
| "rewards/rejected": -21.267375946044922, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.948717948717949, |
| "grad_norm": 0.068359375, |
| "learning_rate": 3.851190142939442e-06, |
| "logits/chosen": -1.0751783847808838, |
| "logits/rejected": -0.3822060823440552, |
| "logps/chosen": -71.5597915649414, |
| "logps/rejected": -287.9521179199219, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.1508712768554688, |
| "rewards/margins": 17.788990020751953, |
| "rewards/rejected": -20.939861297607422, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.97008547008547, |
| "grad_norm": 0.005615234375, |
| "learning_rate": 3.844483774214069e-06, |
| "logits/chosen": -1.044440507888794, |
| "logits/rejected": -0.2854236960411072, |
| "logps/chosen": -70.91199493408203, |
| "logps/rejected": -288.72283935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.184840679168701, |
| "rewards/margins": 17.9717960357666, |
| "rewards/rejected": -21.156635284423828, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.9871794871794872, |
| "eval_logits/chosen": -1.1473212242126465, |
| "eval_logits/rejected": -0.4121095538139343, |
| "eval_logps/chosen": -73.0550308227539, |
| "eval_logps/rejected": -287.1957092285156, |
| "eval_loss": 4.4729193177772686e-05, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -3.3611295223236084, |
| "eval_rewards/margins": 17.5567569732666, |
| "eval_rewards/rejected": -20.917884826660156, |
| "eval_runtime": 9.4413, |
| "eval_samples_per_second": 21.183, |
| "eval_steps_per_second": 21.183, |
| "step": 699 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 702, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|