| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6910452058738843, | |
| "eval_steps": 500, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005758710048949035, | |
| "grad_norm": 19.853458404541016, | |
| "learning_rate": 5.172413793103448e-08, | |
| "logits/chosen": -0.562769889831543, | |
| "logits/rejected": -0.5616950988769531, | |
| "logps/chosen": -127.0331802368164, | |
| "logps/rejected": -83.81842041015625, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.00019152756431140006, | |
| "rewards/margins": 0.000841214437969029, | |
| "rewards/rejected": -0.0010327422060072422, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01151742009789807, | |
| "grad_norm": 19.53203582763672, | |
| "learning_rate": 1.0919540229885057e-07, | |
| "logits/chosen": -0.6015105247497559, | |
| "logits/rejected": -0.6022548079490662, | |
| "logps/chosen": -143.99452209472656, | |
| "logps/rejected": -89.27932739257812, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 0.00048787007108330727, | |
| "rewards/margins": 0.001671066740527749, | |
| "rewards/rejected": -0.0011831964366137981, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.017276130146847105, | |
| "grad_norm": 22.230405807495117, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -0.582757830619812, | |
| "logits/rejected": -0.5794022679328918, | |
| "logps/chosen": -128.6601104736328, | |
| "logps/rejected": -76.13624572753906, | |
| "loss": 0.6944, | |
| "rewards/accuracies": 0.4812500476837158, | |
| "rewards/chosen": -0.0016581722302362323, | |
| "rewards/margins": -0.0020952033810317516, | |
| "rewards/rejected": 0.0004370305105112493, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02303484019579614, | |
| "grad_norm": 19.54130744934082, | |
| "learning_rate": 2.2413793103448274e-07, | |
| "logits/chosen": -0.5358110666275024, | |
| "logits/rejected": -0.5239226222038269, | |
| "logps/chosen": -148.56979370117188, | |
| "logps/rejected": -91.35775756835938, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": 0.0023662634193897247, | |
| "rewards/margins": 0.0035909225698560476, | |
| "rewards/rejected": -0.0012246592668816447, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.028793550244745177, | |
| "grad_norm": 29.174854278564453, | |
| "learning_rate": 2.816091954022988e-07, | |
| "logits/chosen": -0.6022522449493408, | |
| "logits/rejected": -0.6001952886581421, | |
| "logps/chosen": -165.06558227539062, | |
| "logps/rejected": -83.33187866210938, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.6281249523162842, | |
| "rewards/chosen": 0.004078409168869257, | |
| "rewards/margins": 0.011784590780735016, | |
| "rewards/rejected": -0.007706179283559322, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03455226029369421, | |
| "grad_norm": 22.835206985473633, | |
| "learning_rate": 3.390804597701149e-07, | |
| "logits/chosen": -0.5758532881736755, | |
| "logits/rejected": -0.5722157955169678, | |
| "logps/chosen": -136.6300811767578, | |
| "logps/rejected": -82.68758392333984, | |
| "loss": 0.6806, | |
| "rewards/accuracies": 0.6656249761581421, | |
| "rewards/chosen": 0.014781510457396507, | |
| "rewards/margins": 0.026332611218094826, | |
| "rewards/rejected": -0.01155109889805317, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04031097034264325, | |
| "grad_norm": 18.888277053833008, | |
| "learning_rate": 3.9655172413793105e-07, | |
| "logits/chosen": -0.584574282169342, | |
| "logits/rejected": -0.5734848380088806, | |
| "logps/chosen": -112.57417297363281, | |
| "logps/rejected": -81.75566864013672, | |
| "loss": 0.673, | |
| "rewards/accuracies": 0.659375011920929, | |
| "rewards/chosen": 0.026554793119430542, | |
| "rewards/margins": 0.04325523227453232, | |
| "rewards/rejected": -0.016700439155101776, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04606968039159228, | |
| "grad_norm": 18.912086486816406, | |
| "learning_rate": 4.540229885057471e-07, | |
| "logits/chosen": -0.5466264486312866, | |
| "logits/rejected": -0.5328729152679443, | |
| "logps/chosen": -150.10618591308594, | |
| "logps/rejected": -81.80156707763672, | |
| "loss": 0.6557, | |
| "rewards/accuracies": 0.7187500596046448, | |
| "rewards/chosen": 0.05700210481882095, | |
| "rewards/margins": 0.08444017171859741, | |
| "rewards/rejected": -0.02743806689977646, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05182839044054132, | |
| "grad_norm": 20.304170608520508, | |
| "learning_rate": 5.114942528735632e-07, | |
| "logits/chosen": -0.5730519890785217, | |
| "logits/rejected": -0.5601622462272644, | |
| "logps/chosen": -135.96995544433594, | |
| "logps/rejected": -83.88729858398438, | |
| "loss": 0.6352, | |
| "rewards/accuracies": 0.7312500476837158, | |
| "rewards/chosen": 0.07092135399580002, | |
| "rewards/margins": 0.13441398739814758, | |
| "rewards/rejected": -0.06349264085292816, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05758710048949035, | |
| "grad_norm": 19.214027404785156, | |
| "learning_rate": 5.689655172413793e-07, | |
| "logits/chosen": -0.5703257322311401, | |
| "logits/rejected": -0.5486276745796204, | |
| "logps/chosen": -132.86158752441406, | |
| "logps/rejected": -84.33039093017578, | |
| "loss": 0.6198, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": 0.0864553153514862, | |
| "rewards/margins": 0.18071594834327698, | |
| "rewards/rejected": -0.09426065534353256, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06334581053843939, | |
| "grad_norm": 18.2548885345459, | |
| "learning_rate": 6.264367816091954e-07, | |
| "logits/chosen": -0.5512282848358154, | |
| "logits/rejected": -0.5419508814811707, | |
| "logps/chosen": -128.60459899902344, | |
| "logps/rejected": -88.85281372070312, | |
| "loss": 0.6071, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.08900328725576401, | |
| "rewards/margins": 0.23027661442756653, | |
| "rewards/rejected": -0.1412733495235443, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06910452058738842, | |
| "grad_norm": 16.20355987548828, | |
| "learning_rate": 6.839080459770114e-07, | |
| "logits/chosen": -0.5748304724693298, | |
| "logits/rejected": -0.5648065805435181, | |
| "logps/chosen": -131.20220947265625, | |
| "logps/rejected": -83.58260345458984, | |
| "loss": 0.588, | |
| "rewards/accuracies": 0.7406250238418579, | |
| "rewards/chosen": 0.1262008100748062, | |
| "rewards/margins": 0.2918751537799835, | |
| "rewards/rejected": -0.1656743437051773, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07486323063633746, | |
| "grad_norm": 14.335201263427734, | |
| "learning_rate": 7.413793103448276e-07, | |
| "logits/chosen": -0.5980964303016663, | |
| "logits/rejected": -0.5801858901977539, | |
| "logps/chosen": -129.68930053710938, | |
| "logps/rejected": -77.69696807861328, | |
| "loss": 0.562, | |
| "rewards/accuracies": 0.7406250238418579, | |
| "rewards/chosen": 0.13151288032531738, | |
| "rewards/margins": 0.3828701972961426, | |
| "rewards/rejected": -0.2513573467731476, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0806219406852865, | |
| "grad_norm": 17.238740921020508, | |
| "learning_rate": 7.988505747126436e-07, | |
| "logits/chosen": -0.5309160351753235, | |
| "logits/rejected": -0.5147740840911865, | |
| "logps/chosen": -136.6770477294922, | |
| "logps/rejected": -84.51583862304688, | |
| "loss": 0.5519, | |
| "rewards/accuracies": 0.753125011920929, | |
| "rewards/chosen": 0.19663727283477783, | |
| "rewards/margins": 0.4776872396469116, | |
| "rewards/rejected": -0.2810499668121338, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08638065073423554, | |
| "grad_norm": 15.70281982421875, | |
| "learning_rate": 8.563218390804597e-07, | |
| "logits/chosen": -0.5557988286018372, | |
| "logits/rejected": -0.540660560131073, | |
| "logps/chosen": -144.00198364257812, | |
| "logps/rejected": -100.53820037841797, | |
| "loss": 0.5486, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.23025350272655487, | |
| "rewards/margins": 0.5131940245628357, | |
| "rewards/rejected": -0.28294044733047485, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09213936078318456, | |
| "grad_norm": 17.085052490234375, | |
| "learning_rate": 9.137931034482759e-07, | |
| "logits/chosen": -0.5505703687667847, | |
| "logits/rejected": -0.5197522640228271, | |
| "logps/chosen": -136.61239624023438, | |
| "logps/rejected": -96.98216247558594, | |
| "loss": 0.5496, | |
| "rewards/accuracies": 0.7375000715255737, | |
| "rewards/chosen": 0.1979924887418747, | |
| "rewards/margins": 0.5241073966026306, | |
| "rewards/rejected": -0.3261149227619171, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0978980708321336, | |
| "grad_norm": 16.59015464782715, | |
| "learning_rate": 9.712643678160918e-07, | |
| "logits/chosen": -0.5620574355125427, | |
| "logits/rejected": -0.5433012247085571, | |
| "logps/chosen": -130.6116943359375, | |
| "logps/rejected": -88.13215637207031, | |
| "loss": 0.5364, | |
| "rewards/accuracies": 0.7500000596046448, | |
| "rewards/chosen": 0.16394342482089996, | |
| "rewards/margins": 0.577468752861023, | |
| "rewards/rejected": -0.4135252833366394, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.10365678088108264, | |
| "grad_norm": 14.182692527770996, | |
| "learning_rate": 9.99974750187855e-07, | |
| "logits/chosen": -0.6073015332221985, | |
| "logits/rejected": -0.5830385088920593, | |
| "logps/chosen": -134.97865295410156, | |
| "logps/rejected": -82.4712905883789, | |
| "loss": 0.5313, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": 0.26480597257614136, | |
| "rewards/margins": 0.6398300528526306, | |
| "rewards/rejected": -0.37502405047416687, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10941549093003167, | |
| "grad_norm": 15.201092720031738, | |
| "learning_rate": 9.997727669917109e-07, | |
| "logits/chosen": -0.5887178182601929, | |
| "logits/rejected": -0.5629587769508362, | |
| "logps/chosen": -134.81166076660156, | |
| "logps/rejected": -91.30838775634766, | |
| "loss": 0.5264, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": 0.15517549216747284, | |
| "rewards/margins": 0.6431481242179871, | |
| "rewards/rejected": -0.4879727065563202, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1151742009789807, | |
| "grad_norm": 13.136405944824219, | |
| "learning_rate": 9.993688821979663e-07, | |
| "logits/chosen": -0.5851372480392456, | |
| "logits/rejected": -0.552108645439148, | |
| "logps/chosen": -143.36703491210938, | |
| "logps/rejected": -102.49083709716797, | |
| "loss": 0.4568, | |
| "rewards/accuracies": 0.8062500357627869, | |
| "rewards/chosen": 0.3289705216884613, | |
| "rewards/margins": 0.8849547505378723, | |
| "rewards/rejected": -0.5559841394424438, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12093291102792975, | |
| "grad_norm": 26.78643035888672, | |
| "learning_rate": 9.98763258970744e-07, | |
| "logits/chosen": -0.6051122546195984, | |
| "logits/rejected": -0.5708233714103699, | |
| "logps/chosen": -152.91024780273438, | |
| "logps/rejected": -95.11014556884766, | |
| "loss": 0.4741, | |
| "rewards/accuracies": 0.7718750238418579, | |
| "rewards/chosen": 0.2776795029640198, | |
| "rewards/margins": 0.8509930372238159, | |
| "rewards/rejected": -0.5733135342597961, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12669162107687879, | |
| "grad_norm": 24.18678855895996, | |
| "learning_rate": 9.979561419738296e-07, | |
| "logits/chosen": -0.6134639978408813, | |
| "logits/rejected": -0.5940448045730591, | |
| "logps/chosen": -132.16368103027344, | |
| "logps/rejected": -93.20537567138672, | |
| "loss": 0.5273, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": 0.22999629378318787, | |
| "rewards/margins": 0.7246684432029724, | |
| "rewards/rejected": -0.49467211961746216, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 12.556412696838379, | |
| "learning_rate": 9.969478572718307e-07, | |
| "logits/chosen": -0.6136160492897034, | |
| "logits/rejected": -0.5797239542007446, | |
| "logps/chosen": -124.40892028808594, | |
| "logps/rejected": -90.66249084472656, | |
| "loss": 0.4829, | |
| "rewards/accuracies": 0.778124988079071, | |
| "rewards/chosen": 0.29396748542785645, | |
| "rewards/margins": 0.8314955830574036, | |
| "rewards/rejected": -0.5375280976295471, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13820904117477684, | |
| "grad_norm": 17.433090209960938, | |
| "learning_rate": 9.95738812198451e-07, | |
| "logits/chosen": -0.6312894821166992, | |
| "logits/rejected": -0.6113855838775635, | |
| "logps/chosen": -122.38996124267578, | |
| "logps/rejected": -82.93913269042969, | |
| "loss": 0.5194, | |
| "rewards/accuracies": 0.7437500357627869, | |
| "rewards/chosen": 0.19915173947811127, | |
| "rewards/margins": 0.7839306592941284, | |
| "rewards/rejected": -0.5847789645195007, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.14396775122372588, | |
| "grad_norm": 16.78449058532715, | |
| "learning_rate": 9.943294951919325e-07, | |
| "logits/chosen": -0.6331308484077454, | |
| "logits/rejected": -0.6054038405418396, | |
| "logps/chosen": -131.5919647216797, | |
| "logps/rejected": -96.62232971191406, | |
| "loss": 0.4465, | |
| "rewards/accuracies": 0.7999999523162842, | |
| "rewards/chosen": 0.3666311800479889, | |
| "rewards/margins": 0.9798641204833984, | |
| "rewards/rejected": -0.6132329702377319, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14972646127267492, | |
| "grad_norm": 18.236032485961914, | |
| "learning_rate": 9.92720475597734e-07, | |
| "logits/chosen": -0.6338821649551392, | |
| "logits/rejected": -0.6092484593391418, | |
| "logps/chosen": -120.25117492675781, | |
| "logps/rejected": -93.84246063232422, | |
| "loss": 0.4875, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": 0.22484159469604492, | |
| "rewards/margins": 0.8752425312995911, | |
| "rewards/rejected": -0.6504008769989014, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15548517132162396, | |
| "grad_norm": 11.804591178894043, | |
| "learning_rate": 9.909124034385224e-07, | |
| "logits/chosen": -0.6439257860183716, | |
| "logits/rejected": -0.6129547357559204, | |
| "logps/chosen": -129.7813262939453, | |
| "logps/rejected": -83.8338623046875, | |
| "loss": 0.4296, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.3556497097015381, | |
| "rewards/margins": 1.0146920680999756, | |
| "rewards/rejected": -0.6590422987937927, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.161243881370573, | |
| "grad_norm": 13.709634780883789, | |
| "learning_rate": 9.889060091515707e-07, | |
| "logits/chosen": -0.6321221590042114, | |
| "logits/rejected": -0.6142387986183167, | |
| "logps/chosen": -123.8255844116211, | |
| "logps/rejected": -100.52135467529297, | |
| "loss": 0.4754, | |
| "rewards/accuracies": 0.746874988079071, | |
| "rewards/chosen": 0.21102119982242584, | |
| "rewards/margins": 0.9996404647827148, | |
| "rewards/rejected": -0.7886192798614502, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16700259141952203, | |
| "grad_norm": 16.686607360839844, | |
| "learning_rate": 9.86702103293674e-07, | |
| "logits/chosen": -0.6492558717727661, | |
| "logits/rejected": -0.6142836809158325, | |
| "logps/chosen": -149.1907196044922, | |
| "logps/rejected": -94.28280639648438, | |
| "loss": 0.4284, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.2924773693084717, | |
| "rewards/margins": 1.1958118677139282, | |
| "rewards/rejected": -0.9033344984054565, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.17276130146847107, | |
| "grad_norm": 18.625837326049805, | |
| "learning_rate": 9.843015762136925e-07, | |
| "logits/chosen": -0.6587651371955872, | |
| "logits/rejected": -0.6321001052856445, | |
| "logps/chosen": -129.7299041748047, | |
| "logps/rejected": -85.21316528320312, | |
| "loss": 0.443, | |
| "rewards/accuracies": 0.7999999523162842, | |
| "rewards/chosen": 0.3374091684818268, | |
| "rewards/margins": 1.1282662153244019, | |
| "rewards/rejected": -0.7908569574356079, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17852001151742009, | |
| "grad_norm": 19.16632843017578, | |
| "learning_rate": 9.817053976928643e-07, | |
| "logits/chosen": -0.6443223357200623, | |
| "logits/rejected": -0.6130384206771851, | |
| "logps/chosen": -137.69725036621094, | |
| "logps/rejected": -92.34024047851562, | |
| "loss": 0.4861, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.2451004534959793, | |
| "rewards/margins": 1.0736701488494873, | |
| "rewards/rejected": -0.8285696506500244, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.18427872156636912, | |
| "grad_norm": 21.944595336914062, | |
| "learning_rate": 9.789146165530254e-07, | |
| "logits/chosen": -0.6652050018310547, | |
| "logits/rejected": -0.6271861791610718, | |
| "logps/chosen": -127.1378173828125, | |
| "logps/rejected": -81.23876953125, | |
| "loss": 0.444, | |
| "rewards/accuracies": 0.8156250715255737, | |
| "rewards/chosen": 0.20780594646930695, | |
| "rewards/margins": 1.1428509950637817, | |
| "rewards/rejected": -0.9350449442863464, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.19003743161531816, | |
| "grad_norm": 15.266040802001953, | |
| "learning_rate": 9.759303602328992e-07, | |
| "logits/chosen": -0.6632432341575623, | |
| "logits/rejected": -0.6333745718002319, | |
| "logps/chosen": -146.56114196777344, | |
| "logps/rejected": -89.61295318603516, | |
| "loss": 0.4436, | |
| "rewards/accuracies": 0.8093750476837158, | |
| "rewards/chosen": 0.305823415517807, | |
| "rewards/margins": 1.1472581624984741, | |
| "rewards/rejected": -0.8414347171783447, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1957961416642672, | |
| "grad_norm": 14.493436813354492, | |
| "learning_rate": 9.727538343326278e-07, | |
| "logits/chosen": -0.6710892915725708, | |
| "logits/rejected": -0.6370099782943726, | |
| "logps/chosen": -139.57029724121094, | |
| "logps/rejected": -87.5551986694336, | |
| "loss": 0.4235, | |
| "rewards/accuracies": 0.8031250238418579, | |
| "rewards/chosen": 0.4220184087753296, | |
| "rewards/margins": 1.233292579650879, | |
| "rewards/rejected": -0.8112741708755493, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.20155485171321624, | |
| "grad_norm": 33.87898635864258, | |
| "learning_rate": 9.693863221267237e-07, | |
| "logits/chosen": -0.6808286309242249, | |
| "logits/rejected": -0.6564449071884155, | |
| "logps/chosen": -127.93206024169922, | |
| "logps/rejected": -95.1683578491211, | |
| "loss": 0.4536, | |
| "rewards/accuracies": 0.815625011920929, | |
| "rewards/chosen": 0.3246816098690033, | |
| "rewards/margins": 1.1423108577728271, | |
| "rewards/rejected": -0.8176291584968567, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20731356176216528, | |
| "grad_norm": 22.24034881591797, | |
| "learning_rate": 9.658291840456452e-07, | |
| "logits/chosen": -0.6467706561088562, | |
| "logits/rejected": -0.6211069822311401, | |
| "logps/chosen": -155.3069610595703, | |
| "logps/rejected": -108.99771118164062, | |
| "loss": 0.4981, | |
| "rewards/accuracies": 0.7781249284744263, | |
| "rewards/chosen": 0.42678219079971313, | |
| "rewards/margins": 1.1551151275634766, | |
| "rewards/rejected": -0.7283328175544739, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.21307227181111432, | |
| "grad_norm": 39.71334457397461, | |
| "learning_rate": 9.620838571261993e-07, | |
| "logits/chosen": -0.6781237125396729, | |
| "logits/rejected": -0.6576768159866333, | |
| "logps/chosen": -129.8390350341797, | |
| "logps/rejected": -101.4920425415039, | |
| "loss": 0.455, | |
| "rewards/accuracies": 0.8093750476837158, | |
| "rewards/chosen": 0.32165372371673584, | |
| "rewards/margins": 1.1231375932693481, | |
| "rewards/rejected": -0.8014839291572571, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21883098186006333, | |
| "grad_norm": 18.181711196899414, | |
| "learning_rate": 9.581518544309992e-07, | |
| "logits/chosen": -0.6723761558532715, | |
| "logits/rejected": -0.6443379521369934, | |
| "logps/chosen": -118.37055969238281, | |
| "logps/rejected": -89.48719787597656, | |
| "loss": 0.4255, | |
| "rewards/accuracies": 0.809374988079071, | |
| "rewards/chosen": 0.2892196774482727, | |
| "rewards/margins": 1.1490939855575562, | |
| "rewards/rejected": -0.8598741888999939, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.22458969190901237, | |
| "grad_norm": 21.584299087524414, | |
| "learning_rate": 9.540347644372052e-07, | |
| "logits/chosen": -0.6740597486495972, | |
| "logits/rejected": -0.6414741277694702, | |
| "logps/chosen": -124.53128814697266, | |
| "logps/rejected": -91.60765838623047, | |
| "loss": 0.4104, | |
| "rewards/accuracies": 0.8187500834465027, | |
| "rewards/chosen": 0.2165013551712036, | |
| "rewards/margins": 1.1881834268569946, | |
| "rewards/rejected": -0.971682071685791, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2303484019579614, | |
| "grad_norm": 24.072954177856445, | |
| "learning_rate": 9.497342503948025e-07, | |
| "logits/chosen": -0.656568169593811, | |
| "logits/rejected": -0.6270979046821594, | |
| "logps/chosen": -142.2841339111328, | |
| "logps/rejected": -107.37252044677734, | |
| "loss": 0.3876, | |
| "rewards/accuracies": 0.8343750238418579, | |
| "rewards/chosen": 0.4219643473625183, | |
| "rewards/margins": 1.381853699684143, | |
| "rewards/rejected": -0.9598893523216248, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23610711200691045, | |
| "grad_norm": 21.61984634399414, | |
| "learning_rate": 9.452520496546692e-07, | |
| "logits/chosen": -0.6732717156410217, | |
| "logits/rejected": -0.6305854916572571, | |
| "logps/chosen": -149.52734375, | |
| "logps/rejected": -107.50003814697266, | |
| "loss": 0.3872, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": 0.560570478439331, | |
| "rewards/margins": 1.6208797693252563, | |
| "rewards/rejected": -1.0603091716766357, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2418658220558595, | |
| "grad_norm": 17.159812927246094, | |
| "learning_rate": 9.405899729667103e-07, | |
| "logits/chosen": -0.6888399720191956, | |
| "logits/rejected": -0.663253903388977, | |
| "logps/chosen": -126.42945861816406, | |
| "logps/rejected": -115.0486068725586, | |
| "loss": 0.5476, | |
| "rewards/accuracies": 0.8031249642372131, | |
| "rewards/chosen": 0.2148836851119995, | |
| "rewards/margins": 1.1695863008499146, | |
| "rewards/rejected": -0.954702615737915, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.24762453210480853, | |
| "grad_norm": 46.91326904296875, | |
| "learning_rate": 9.357499037483376e-07, | |
| "logits/chosen": -0.7161371111869812, | |
| "logits/rejected": -0.684617817401886, | |
| "logps/chosen": -141.0438232421875, | |
| "logps/rejected": -95.7398910522461, | |
| "loss": 0.407, | |
| "rewards/accuracies": 0.846875011920929, | |
| "rewards/chosen": 0.252617210149765, | |
| "rewards/margins": 1.3679397106170654, | |
| "rewards/rejected": -1.115322470664978, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.25338324215375757, | |
| "grad_norm": 12.841931343078613, | |
| "learning_rate": 9.307337973235949e-07, | |
| "logits/chosen": -0.708075225353241, | |
| "logits/rejected": -0.6804291605949402, | |
| "logps/chosen": -134.2688751220703, | |
| "logps/rejected": -88.66293334960938, | |
| "loss": 0.4054, | |
| "rewards/accuracies": 0.8281249403953552, | |
| "rewards/chosen": 0.3387967050075531, | |
| "rewards/margins": 1.3938863277435303, | |
| "rewards/rejected": -1.0550895929336548, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2591419522027066, | |
| "grad_norm": 29.414453506469727, | |
| "learning_rate": 9.255436801332324e-07, | |
| "logits/chosen": -0.7155076265335083, | |
| "logits/rejected": -0.6844953894615173, | |
| "logps/chosen": -128.78543090820312, | |
| "logps/rejected": -87.72601318359375, | |
| "loss": 0.4451, | |
| "rewards/accuracies": 0.8218749761581421, | |
| "rewards/chosen": 0.26765358448028564, | |
| "rewards/margins": 1.3901262283325195, | |
| "rewards/rejected": -1.1224726438522339, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 39.492164611816406, | |
| "learning_rate": 9.201816489160516e-07, | |
| "logits/chosen": -0.7237324118614197, | |
| "logits/rejected": -0.705731987953186, | |
| "logps/chosen": -134.49008178710938, | |
| "logps/rejected": -108.36016845703125, | |
| "loss": 0.4056, | |
| "rewards/accuracies": 0.8375000357627869, | |
| "rewards/chosen": 0.34749743342399597, | |
| "rewards/margins": 1.4375081062316895, | |
| "rewards/rejected": -1.090010643005371, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.27065937230060466, | |
| "grad_norm": 10.968271255493164, | |
| "learning_rate": 9.146498698618506e-07, | |
| "logits/chosen": -0.7146254777908325, | |
| "logits/rejected": -0.6773239374160767, | |
| "logps/chosen": -142.56112670898438, | |
| "logps/rejected": -97.36111450195312, | |
| "loss": 0.385, | |
| "rewards/accuracies": 0.8343750238418579, | |
| "rewards/chosen": 0.3780550956726074, | |
| "rewards/margins": 1.5382754802703857, | |
| "rewards/rejected": -1.1602205038070679, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2764180823495537, | |
| "grad_norm": 20.968557357788086, | |
| "learning_rate": 9.089505777363112e-07, | |
| "logits/chosen": -0.7134730219841003, | |
| "logits/rejected": -0.6850117444992065, | |
| "logps/chosen": -139.8096923828125, | |
| "logps/rejected": -98.57595825195312, | |
| "loss": 0.4562, | |
| "rewards/accuracies": 0.7906249761581421, | |
| "rewards/chosen": 0.22018523514270782, | |
| "rewards/margins": 1.3545597791671753, | |
| "rewards/rejected": -1.1343746185302734, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.28217679239850274, | |
| "grad_norm": 19.068077087402344, | |
| "learning_rate": 9.030860749781846e-07, | |
| "logits/chosen": -0.7155986428260803, | |
| "logits/rejected": -0.6849885582923889, | |
| "logps/chosen": -138.48265075683594, | |
| "logps/rejected": -111.72834014892578, | |
| "loss": 0.4645, | |
| "rewards/accuracies": 0.7906250357627869, | |
| "rewards/chosen": 0.24146807193756104, | |
| "rewards/margins": 1.4533673524856567, | |
| "rewards/rejected": -1.2118991613388062, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.28793550244745175, | |
| "grad_norm": 24.932621002197266, | |
| "learning_rate": 8.970587307691355e-07, | |
| "logits/chosen": -0.6751305460929871, | |
| "logits/rejected": -0.6510910987854004, | |
| "logps/chosen": -137.3748321533203, | |
| "logps/rejected": -99.03021240234375, | |
| "loss": 0.5792, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": 0.15273617208003998, | |
| "rewards/margins": 1.3070764541625977, | |
| "rewards/rejected": -1.1543402671813965, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2936942124964008, | |
| "grad_norm": 27.17708969116211, | |
| "learning_rate": 8.908709800766236e-07, | |
| "logits/chosen": -0.6968642473220825, | |
| "logits/rejected": -0.6686012744903564, | |
| "logps/chosen": -134.98231506347656, | |
| "logps/rejected": -92.57615661621094, | |
| "loss": 0.4038, | |
| "rewards/accuracies": 0.8156250715255737, | |
| "rewards/chosen": 0.13865375518798828, | |
| "rewards/margins": 1.3224716186523438, | |
| "rewards/rejected": -1.1838178634643555, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.29945292254534983, | |
| "grad_norm": 14.130817413330078, | |
| "learning_rate": 8.845253226702103e-07, | |
| "logits/chosen": -0.6665252447128296, | |
| "logits/rejected": -0.6443254947662354, | |
| "logps/chosen": -136.849609375, | |
| "logps/rejected": -116.30662536621094, | |
| "loss": 0.4034, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.3671894669532776, | |
| "rewards/margins": 1.4308054447174072, | |
| "rewards/rejected": -1.0636159181594849, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3052116325942989, | |
| "grad_norm": 19.757686614990234, | |
| "learning_rate": 8.780243221116837e-07, | |
| "logits/chosen": -0.7094990015029907, | |
| "logits/rejected": -0.6770952939987183, | |
| "logps/chosen": -124.83984375, | |
| "logps/rejected": -87.22465515136719, | |
| "loss": 0.3836, | |
| "rewards/accuracies": 0.8437500596046448, | |
| "rewards/chosen": 0.23798859119415283, | |
| "rewards/margins": 1.3941946029663086, | |
| "rewards/rejected": -1.1562058925628662, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3109703426432479, | |
| "grad_norm": 24.308874130249023, | |
| "learning_rate": 8.713706047194135e-07, | |
| "logits/chosen": -0.6967591047286987, | |
| "logits/rejected": -0.6650117635726929, | |
| "logps/chosen": -136.66961669921875, | |
| "logps/rejected": -92.86897277832031, | |
| "loss": 0.4288, | |
| "rewards/accuracies": 0.8031250238418579, | |
| "rewards/chosen": 0.1572980284690857, | |
| "rewards/margins": 1.3664262294769287, | |
| "rewards/rejected": -1.2091283798217773, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3167290526921969, | |
| "grad_norm": 22.169261932373047, | |
| "learning_rate": 8.645668585073538e-07, | |
| "logits/chosen": -0.7082847356796265, | |
| "logits/rejected": -0.6733636260032654, | |
| "logps/chosen": -123.37152099609375, | |
| "logps/rejected": -85.6908187866211, | |
| "loss": 0.3699, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": 0.24349649250507355, | |
| "rewards/margins": 1.6746883392333984, | |
| "rewards/rejected": -1.4311916828155518, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.322487762741146, | |
| "grad_norm": 16.986181259155273, | |
| "learning_rate": 8.576158320991204e-07, | |
| "logits/chosen": -0.7158868908882141, | |
| "logits/rejected": -0.6811345815658569, | |
| "logps/chosen": -135.4944610595703, | |
| "logps/rejected": -91.58667755126953, | |
| "loss": 0.4072, | |
| "rewards/accuracies": 0.7968749403953552, | |
| "rewards/chosen": 0.31856077909469604, | |
| "rewards/margins": 1.5791938304901123, | |
| "rewards/rejected": -1.2606329917907715, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.328246472790095, | |
| "grad_norm": 17.154346466064453, | |
| "learning_rate": 8.505203336175835e-07, | |
| "logits/chosen": -0.70228111743927, | |
| "logits/rejected": -0.6674139499664307, | |
| "logps/chosen": -140.62643432617188, | |
| "logps/rejected": -104.01993560791016, | |
| "loss": 0.4011, | |
| "rewards/accuracies": 0.809374988079071, | |
| "rewards/chosen": 0.26829949021339417, | |
| "rewards/margins": 1.6834442615509033, | |
| "rewards/rejected": -1.415144920349121, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.33400518283904407, | |
| "grad_norm": 44.71592712402344, | |
| "learning_rate": 8.432832295504223e-07, | |
| "logits/chosen": -0.7484235763549805, | |
| "logits/rejected": -0.7252366542816162, | |
| "logps/chosen": -130.38858032226562, | |
| "logps/rejected": -97.56017303466797, | |
| "loss": 0.4027, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": 0.21075472235679626, | |
| "rewards/margins": 1.5023763179779053, | |
| "rewards/rejected": -1.2916215658187866, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3397638928879931, | |
| "grad_norm": 18.29545021057129, | |
| "learning_rate": 8.359074435921031e-07, | |
| "logits/chosen": -0.7082387208938599, | |
| "logits/rejected": -0.672591507434845, | |
| "logps/chosen": -140.56137084960938, | |
| "logps/rejected": -97.13379669189453, | |
| "loss": 0.3661, | |
| "rewards/accuracies": 0.8562499284744263, | |
| "rewards/chosen": 0.36341509222984314, | |
| "rewards/margins": 1.8082225322723389, | |
| "rewards/rejected": -1.4448072910308838, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.34552260293694215, | |
| "grad_norm": 14.033991813659668, | |
| "learning_rate": 8.283959554627446e-07, | |
| "logits/chosen": -0.716846764087677, | |
| "logits/rejected": -0.6769986748695374, | |
| "logps/chosen": -142.1970977783203, | |
| "logps/rejected": -102.24588775634766, | |
| "loss": 0.3741, | |
| "rewards/accuracies": 0.8625000715255737, | |
| "rewards/chosen": 0.10098960995674133, | |
| "rewards/margins": 1.6507571935653687, | |
| "rewards/rejected": -1.5497674942016602, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.35128131298589116, | |
| "grad_norm": 13.405789375305176, | |
| "learning_rate": 8.207517997043504e-07, | |
| "logits/chosen": -0.7140165567398071, | |
| "logits/rejected": -0.6859545111656189, | |
| "logps/chosen": -144.771240234375, | |
| "logps/rejected": -104.20320892333984, | |
| "loss": 0.3605, | |
| "rewards/accuracies": 0.8343750238418579, | |
| "rewards/chosen": 0.20804497599601746, | |
| "rewards/margins": 1.7361663579940796, | |
| "rewards/rejected": -1.5281215906143188, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.35704002303484017, | |
| "grad_norm": 15.424454689025879, | |
| "learning_rate": 8.129780644548938e-07, | |
| "logits/chosen": -0.7223767042160034, | |
| "logits/rejected": -0.6883643269538879, | |
| "logps/chosen": -130.39566040039062, | |
| "logps/rejected": -103.71538543701172, | |
| "loss": 0.3571, | |
| "rewards/accuracies": 0.8375000357627869, | |
| "rewards/chosen": 0.09431158006191254, | |
| "rewards/margins": 1.7663012742996216, | |
| "rewards/rejected": -1.6719896793365479, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.36279873308378924, | |
| "grad_norm": 15.335771560668945, | |
| "learning_rate": 8.05077890200752e-07, | |
| "logits/chosen": -0.7171313166618347, | |
| "logits/rejected": -0.6881922483444214, | |
| "logps/chosen": -151.4913787841797, | |
| "logps/rejected": -98.9349594116211, | |
| "loss": 0.3646, | |
| "rewards/accuracies": 0.8312499523162842, | |
| "rewards/chosen": 0.1370360106229782, | |
| "rewards/margins": 1.744744062423706, | |
| "rewards/rejected": -1.607708215713501, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.36855744313273825, | |
| "grad_norm": 17.60926628112793, | |
| "learning_rate": 7.970544685079894e-07, | |
| "logits/chosen": -0.6996970772743225, | |
| "logits/rejected": -0.6725396513938904, | |
| "logps/chosen": -138.51010131835938, | |
| "logps/rejected": -98.81480407714844, | |
| "loss": 0.3743, | |
| "rewards/accuracies": 0.8375000357627869, | |
| "rewards/chosen": 0.06485229730606079, | |
| "rewards/margins": 1.6162770986557007, | |
| "rewards/rejected": -1.5514247417449951, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3743161531816873, | |
| "grad_norm": 14.755389213562012, | |
| "learning_rate": 7.889110407330083e-07, | |
| "logits/chosen": -0.7361860275268555, | |
| "logits/rejected": -0.7013921737670898, | |
| "logps/chosen": -141.9473876953125, | |
| "logps/rejected": -101.18817138671875, | |
| "loss": 0.3477, | |
| "rewards/accuracies": 0.8343750238418579, | |
| "rewards/chosen": 0.34291204810142517, | |
| "rewards/margins": 1.7922308444976807, | |
| "rewards/rejected": -1.4493186473846436, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.38007486323063633, | |
| "grad_norm": 20.350278854370117, | |
| "learning_rate": 7.806508967130836e-07, | |
| "logits/chosen": -0.7531692981719971, | |
| "logits/rejected": -0.7267601490020752, | |
| "logps/chosen": -134.09832763671875, | |
| "logps/rejected": -99.92311096191406, | |
| "loss": 0.3806, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": 0.0986885130405426, | |
| "rewards/margins": 1.6266651153564453, | |
| "rewards/rejected": -1.5279765129089355, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3858335732795854, | |
| "grad_norm": 20.015928268432617, | |
| "learning_rate": 7.722773734373113e-07, | |
| "logits/chosen": -0.7297524213790894, | |
| "logits/rejected": -0.6996749639511108, | |
| "logps/chosen": -136.6656951904297, | |
| "logps/rejected": -103.38198852539062, | |
| "loss": 0.3441, | |
| "rewards/accuracies": 0.8687500357627869, | |
| "rewards/chosen": 0.19155237078666687, | |
| "rewards/margins": 1.881716012954712, | |
| "rewards/rejected": -1.690163493156433, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3915922833285344, | |
| "grad_norm": 15.462444305419922, | |
| "learning_rate": 7.637938536985099e-07, | |
| "logits/chosen": -0.753920316696167, | |
| "logits/rejected": -0.7297205924987793, | |
| "logps/chosen": -129.48484802246094, | |
| "logps/rejected": -100.32254028320312, | |
| "loss": 0.406, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": 0.016594011336565018, | |
| "rewards/margins": 1.5454182624816895, | |
| "rewards/rejected": -1.5288242101669312, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 11.136063575744629, | |
| "learning_rate": 7.552037647266157e-07, | |
| "logits/chosen": -0.7570206522941589, | |
| "logits/rejected": -0.7179579734802246, | |
| "logps/chosen": -144.1813507080078, | |
| "logps/rejected": -103.70523834228516, | |
| "loss": 0.3538, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.2709888219833374, | |
| "rewards/margins": 1.8022199869155884, | |
| "rewards/rejected": -1.5312312841415405, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4031097034264325, | |
| "grad_norm": 29.570873260498047, | |
| "learning_rate": 7.465105768041282e-07, | |
| "logits/chosen": -0.7303261160850525, | |
| "logits/rejected": -0.6878946423530579, | |
| "logps/chosen": -146.9130401611328, | |
| "logps/rejected": -101.49455261230469, | |
| "loss": 0.3586, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.23865552246570587, | |
| "rewards/margins": 1.850990891456604, | |
| "rewards/rejected": -1.6123353242874146, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4088684134753815, | |
| "grad_norm": 14.321788787841797, | |
| "learning_rate": 7.377178018641613e-07, | |
| "logits/chosen": -0.7625525593757629, | |
| "logits/rejected": -0.7228215932846069, | |
| "logps/chosen": -122.93366241455078, | |
| "logps/rejected": -191.53184509277344, | |
| "loss": 0.3407, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.17545101046562195, | |
| "rewards/margins": 2.5035836696624756, | |
| "rewards/rejected": -2.3281326293945312, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.41462712352433057, | |
| "grad_norm": 26.58182144165039, | |
| "learning_rate": 7.288289920716685e-07, | |
| "logits/chosen": -0.7637885808944702, | |
| "logits/rejected": -0.7382663488388062, | |
| "logps/chosen": -113.6340560913086, | |
| "logps/rejected": -94.61100769042969, | |
| "loss": 0.3549, | |
| "rewards/accuracies": 0.8593750596046448, | |
| "rewards/chosen": -0.030434802174568176, | |
| "rewards/margins": 1.6987662315368652, | |
| "rewards/rejected": -1.7292009592056274, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4203858335732796, | |
| "grad_norm": 20.130050659179688, | |
| "learning_rate": 7.198477383884161e-07, | |
| "logits/chosen": -0.7368783950805664, | |
| "logits/rejected": -0.7132025957107544, | |
| "logps/chosen": -129.5439910888672, | |
| "logps/rejected": -99.97601318359375, | |
| "loss": 0.3928, | |
| "rewards/accuracies": 0.8218749761581421, | |
| "rewards/chosen": 0.08906654268503189, | |
| "rewards/margins": 1.758063793182373, | |
| "rewards/rejected": -1.6689971685409546, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.42614454362222864, | |
| "grad_norm": 15.920429229736328, | |
| "learning_rate": 7.107776691222802e-07, | |
| "logits/chosen": -0.7325925827026367, | |
| "logits/rejected": -0.7019511461257935, | |
| "logps/chosen": -119.99696350097656, | |
| "logps/rejected": -93.21834564208984, | |
| "loss": 0.4016, | |
| "rewards/accuracies": 0.8187500238418579, | |
| "rewards/chosen": 0.09440048038959503, | |
| "rewards/margins": 1.7340251207351685, | |
| "rewards/rejected": -1.639624834060669, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.43190325367117766, | |
| "grad_norm": 21.42776107788086, | |
| "learning_rate": 7.016224484614608e-07, | |
| "logits/chosen": -0.6936213970184326, | |
| "logits/rejected": -0.6615394949913025, | |
| "logps/chosen": -131.905029296875, | |
| "logps/rejected": -94.2560806274414, | |
| "loss": 0.4054, | |
| "rewards/accuracies": 0.8187499642372131, | |
| "rewards/chosen": 0.23300418257713318, | |
| "rewards/margins": 1.7516708374023438, | |
| "rewards/rejected": -1.5186666250228882, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43766196372012667, | |
| "grad_norm": 15.192195892333984, | |
| "learning_rate": 6.923857749941959e-07, | |
| "logits/chosen": -0.7206861972808838, | |
| "logits/rejected": -0.6929584741592407, | |
| "logps/chosen": -129.8375701904297, | |
| "logps/rejected": -100.96007537841797, | |
| "loss": 0.3734, | |
| "rewards/accuracies": 0.8156249523162842, | |
| "rewards/chosen": 0.22579814493656158, | |
| "rewards/margins": 1.7992628812789917, | |
| "rewards/rejected": -1.5734646320343018, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.44342067376907573, | |
| "grad_norm": 10.402118682861328, | |
| "learning_rate": 6.830713802145818e-07, | |
| "logits/chosen": -0.6799423098564148, | |
| "logits/rejected": -0.6525387763977051, | |
| "logps/chosen": -154.219482421875, | |
| "logps/rejected": -114.71685028076172, | |
| "loss": 0.4382, | |
| "rewards/accuracies": 0.8281250596046448, | |
| "rewards/chosen": 0.4146007299423218, | |
| "rewards/margins": 1.8844369649887085, | |
| "rewards/rejected": -1.4698363542556763, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.44917938381802475, | |
| "grad_norm": 10.21727466583252, | |
| "learning_rate": 6.736830270150991e-07, | |
| "logits/chosen": -0.6829798221588135, | |
| "logits/rejected": -0.6589241623878479, | |
| "logps/chosen": -121.01963806152344, | |
| "logps/rejected": -97.19430541992188, | |
| "loss": 0.3348, | |
| "rewards/accuracies": 0.8499999642372131, | |
| "rewards/chosen": 0.27119049429893494, | |
| "rewards/margins": 1.897121548652649, | |
| "rewards/rejected": -1.6259310245513916, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4549380938669738, | |
| "grad_norm": 20.992265701293945, | |
| "learning_rate": 6.642245081664522e-07, | |
| "logits/chosen": -0.6706294417381287, | |
| "logits/rejected": -0.6368067860603333, | |
| "logps/chosen": -137.04710388183594, | |
| "logps/rejected": -98.4258041381836, | |
| "loss": 0.386, | |
| "rewards/accuracies": 0.8375000357627869, | |
| "rewards/chosen": 0.18035945296287537, | |
| "rewards/margins": 1.880584955215454, | |
| "rewards/rejected": -1.7002257108688354, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4606968039159228, | |
| "grad_norm": 18.026098251342773, | |
| "learning_rate": 6.54699644785342e-07, | |
| "logits/chosen": -0.6860750913619995, | |
| "logits/rejected": -0.6613771319389343, | |
| "logps/chosen": -136.12977600097656, | |
| "logps/rejected": -94.88777160644531, | |
| "loss": 0.374, | |
| "rewards/accuracies": 0.840624988079071, | |
| "rewards/chosen": 0.26585477590560913, | |
| "rewards/margins": 1.7964979410171509, | |
| "rewards/rejected": -1.5306432247161865, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4664555139648719, | |
| "grad_norm": 12.507932662963867, | |
| "learning_rate": 6.451122847907842e-07, | |
| "logits/chosen": -0.6504511833190918, | |
| "logits/rejected": -0.6260837912559509, | |
| "logps/chosen": -130.69212341308594, | |
| "logps/rejected": -99.03807830810547, | |
| "loss": 0.3886, | |
| "rewards/accuracies": 0.8468750715255737, | |
| "rewards/chosen": 0.2847888469696045, | |
| "rewards/margins": 1.795453667640686, | |
| "rewards/rejected": -1.5106645822525024, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4722142240138209, | |
| "grad_norm": 21.029708862304688, | |
| "learning_rate": 6.354663013496005e-07, | |
| "logits/chosen": -0.6501750946044922, | |
| "logits/rejected": -0.6221209764480591, | |
| "logps/chosen": -147.17929077148438, | |
| "logps/rejected": -126.30306243896484, | |
| "loss": 0.38, | |
| "rewards/accuracies": 0.8562500476837158, | |
| "rewards/chosen": 0.4969615340232849, | |
| "rewards/margins": 1.870727777481079, | |
| "rewards/rejected": -1.373766303062439, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.4779729340627699, | |
| "grad_norm": 15.563918113708496, | |
| "learning_rate": 6.257655913117117e-07, | |
| "logits/chosen": -0.6645382642745972, | |
| "logits/rejected": -0.6453496217727661, | |
| "logps/chosen": -128.99713134765625, | |
| "logps/rejected": -102.27412414550781, | |
| "loss": 0.3668, | |
| "rewards/accuracies": 0.8531250357627869, | |
| "rewards/chosen": 0.3891734480857849, | |
| "rewards/margins": 1.8439801931381226, | |
| "rewards/rejected": -1.4548066854476929, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.483731644111719, | |
| "grad_norm": 9.265098571777344, | |
| "learning_rate": 6.160140736358599e-07, | |
| "logits/chosen": -0.7049685120582581, | |
| "logits/rejected": -0.6709137558937073, | |
| "logps/chosen": -139.44696044921875, | |
| "logps/rejected": -107.42019653320312, | |
| "loss": 0.3237, | |
| "rewards/accuracies": 0.8656250238418579, | |
| "rewards/chosen": 0.42667171359062195, | |
| "rewards/margins": 1.9013292789459229, | |
| "rewards/rejected": -1.4746575355529785, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.489490354160668, | |
| "grad_norm": 15.329190254211426, | |
| "learning_rate": 6.062156878064025e-07, | |
| "logits/chosen": -0.6818621158599854, | |
| "logits/rejected": -0.6550964713096619, | |
| "logps/chosen": -122.67804718017578, | |
| "logps/rejected": -90.76681518554688, | |
| "loss": 0.3436, | |
| "rewards/accuracies": 0.8437500596046448, | |
| "rewards/chosen": 0.35221126675605774, | |
| "rewards/margins": 1.8415284156799316, | |
| "rewards/rejected": -1.4893171787261963, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.49524906420961706, | |
| "grad_norm": 11.879908561706543, | |
| "learning_rate": 5.963743922418121e-07, | |
| "logits/chosen": -0.6924838423728943, | |
| "logits/rejected": -0.6654370427131653, | |
| "logps/chosen": -131.25909423828125, | |
| "logps/rejected": -95.65048217773438, | |
| "loss": 0.3591, | |
| "rewards/accuracies": 0.846875011920929, | |
| "rewards/chosen": 0.36284345388412476, | |
| "rewards/margins": 1.8446120023727417, | |
| "rewards/rejected": -1.4817683696746826, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5010077742585661, | |
| "grad_norm": 24.157817840576172, | |
| "learning_rate": 5.864941626955274e-07, | |
| "logits/chosen": -0.7024100422859192, | |
| "logits/rejected": -0.6722136735916138, | |
| "logps/chosen": -128.6143035888672, | |
| "logps/rejected": -91.6454849243164, | |
| "loss": 0.3274, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.4442964494228363, | |
| "rewards/margins": 1.9124963283538818, | |
| "rewards/rejected": -1.4681998491287231, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5067664843075151, | |
| "grad_norm": 24.65102767944336, | |
| "learning_rate": 5.765789906498015e-07, | |
| "logits/chosen": -0.6772369146347046, | |
| "logits/rejected": -0.6518146991729736, | |
| "logps/chosen": -120.58905792236328, | |
| "logps/rejected": -93.23050689697266, | |
| "loss": 0.4348, | |
| "rewards/accuracies": 0.7968749403953552, | |
| "rewards/chosen": 0.1704983413219452, | |
| "rewards/margins": 1.723240852355957, | |
| "rewards/rejected": -1.5527423620224, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5125251943564642, | |
| "grad_norm": 21.338768005371094, | |
| "learning_rate": 5.666328817031957e-07, | |
| "logits/chosen": -0.6858566403388977, | |
| "logits/rejected": -0.6644458770751953, | |
| "logps/chosen": -143.04592895507812, | |
| "logps/rejected": -112.9994125366211, | |
| "loss": 0.3478, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.39247894287109375, | |
| "rewards/margins": 1.9909181594848633, | |
| "rewards/rejected": -1.598439335823059, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5182839044054132, | |
| "grad_norm": 22.700468063354492, | |
| "learning_rate": 5.56659853952371e-07, | |
| "logits/chosen": -0.6734040975570679, | |
| "logits/rejected": -0.6462678909301758, | |
| "logps/chosen": -126.01319885253906, | |
| "logps/rejected": -101.61735534667969, | |
| "loss": 0.3346, | |
| "rewards/accuracies": 0.8406249284744263, | |
| "rewards/chosen": 0.2884215712547302, | |
| "rewards/margins": 1.878092646598816, | |
| "rewards/rejected": -1.589671015739441, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5240426144543622, | |
| "grad_norm": 14.079261779785156, | |
| "learning_rate": 5.466639363688295e-07, | |
| "logits/chosen": -0.6971774101257324, | |
| "logits/rejected": -0.6686007976531982, | |
| "logps/chosen": -136.96533203125, | |
| "logps/rejected": -98.08868408203125, | |
| "loss": 0.3788, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.458240270614624, | |
| "rewards/margins": 1.9593617916107178, | |
| "rewards/rejected": -1.5011215209960938, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 9.099288940429688, | |
| "learning_rate": 5.366491671712641e-07, | |
| "logits/chosen": -0.6674658060073853, | |
| "logits/rejected": -0.6430867910385132, | |
| "logps/chosen": -194.9149169921875, | |
| "logps/rejected": -105.85720825195312, | |
| "loss": 0.4739, | |
| "rewards/accuracies": 0.8687500357627869, | |
| "rewards/chosen": 0.2768912613391876, | |
| "rewards/margins": 1.7589211463928223, | |
| "rewards/rejected": -1.482029914855957, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5355600345522603, | |
| "grad_norm": 15.008882522583008, | |
| "learning_rate": 5.266195921941696e-07, | |
| "logits/chosen": -0.6785784363746643, | |
| "logits/rejected": -0.6494132280349731, | |
| "logps/chosen": -132.4415740966797, | |
| "logps/rejected": -93.6052017211914, | |
| "loss": 0.2967, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": 0.5966256260871887, | |
| "rewards/margins": 2.0661702156066895, | |
| "rewards/rejected": -1.469544768333435, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5413187446012093, | |
| "grad_norm": 15.97289752960205, | |
| "learning_rate": 5.165792632533811e-07, | |
| "logits/chosen": -0.6924957036972046, | |
| "logits/rejected": -0.6638644337654114, | |
| "logps/chosen": -139.21484375, | |
| "logps/rejected": -106.23296356201172, | |
| "loss": 0.375, | |
| "rewards/accuracies": 0.8312499523162842, | |
| "rewards/chosen": 0.31263190507888794, | |
| "rewards/margins": 1.9060099124908447, | |
| "rewards/rejected": -1.593377947807312, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5470774546501583, | |
| "grad_norm": 12.529922485351562, | |
| "learning_rate": 5.065322365091928e-07, | |
| "logits/chosen": -0.6531215310096741, | |
| "logits/rejected": -0.6270167231559753, | |
| "logps/chosen": -126.1975326538086, | |
| "logps/rejected": -94.7371826171875, | |
| "loss": 0.3378, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": 0.16207355260849, | |
| "rewards/margins": 1.917543649673462, | |
| "rewards/rejected": -1.7554700374603271, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5528361646991073, | |
| "grad_norm": 12.316908836364746, | |
| "learning_rate": 4.964825708277229e-07, | |
| "logits/chosen": -0.6698362231254578, | |
| "logits/rejected": -0.6397368311882019, | |
| "logps/chosen": -142.1855926513672, | |
| "logps/rejected": -107.25379943847656, | |
| "loss": 0.3183, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.49424219131469727, | |
| "rewards/margins": 2.0777647495269775, | |
| "rewards/rejected": -1.5835226774215698, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5585948747480565, | |
| "grad_norm": 20.065715789794922, | |
| "learning_rate": 4.864343261411856e-07, | |
| "logits/chosen": -0.706594705581665, | |
| "logits/rejected": -0.6800837516784668, | |
| "logps/chosen": -136.40191650390625, | |
| "logps/rejected": -96.95333862304688, | |
| "loss": 0.338, | |
| "rewards/accuracies": 0.8562500476837158, | |
| "rewards/chosen": 0.36437052488327026, | |
| "rewards/margins": 2.0314860343933105, | |
| "rewards/rejected": -1.6671154499053955, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5643535847970055, | |
| "grad_norm": 15.975777626037598, | |
| "learning_rate": 4.763915618077319e-07, | |
| "logits/chosen": -0.7158640623092651, | |
| "logits/rejected": -0.6942066550254822, | |
| "logps/chosen": -131.8379669189453, | |
| "logps/rejected": -107.44449615478516, | |
| "loss": 0.3128, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.2980409562587738, | |
| "rewards/margins": 2.096712827682495, | |
| "rewards/rejected": -1.798672080039978, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5701122948459545, | |
| "grad_norm": 17.336626052856445, | |
| "learning_rate": 4.6635833497152217e-07, | |
| "logits/chosen": -0.7176687121391296, | |
| "logits/rejected": -0.6980822086334229, | |
| "logps/chosen": -134.8371124267578, | |
| "logps/rejected": -111.04306030273438, | |
| "loss": 0.3319, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": 0.4131332337856293, | |
| "rewards/margins": 2.0368969440460205, | |
| "rewards/rejected": -1.6237636804580688, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5758710048949035, | |
| "grad_norm": 23.855201721191406, | |
| "learning_rate": 4.5633869892369436e-07, | |
| "logits/chosen": -0.6962485313415527, | |
| "logits/rejected": -0.6787732839584351, | |
| "logps/chosen": -142.33580017089844, | |
| "logps/rejected": -106.88719940185547, | |
| "loss": 0.3622, | |
| "rewards/accuracies": 0.8437499403953552, | |
| "rewards/chosen": 0.36459341645240784, | |
| "rewards/margins": 1.871263027191162, | |
| "rewards/rejected": -1.5066696405410767, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5816297149438526, | |
| "grad_norm": 21.980791091918945, | |
| "learning_rate": 4.463367014648871e-07, | |
| "logits/chosen": -0.6853346824645996, | |
| "logits/rejected": -0.6605929732322693, | |
| "logps/chosen": -126.45109558105469, | |
| "logps/rejected": -87.18302154541016, | |
| "loss": 0.3155, | |
| "rewards/accuracies": 0.8687500357627869, | |
| "rewards/chosen": 0.4707247316837311, | |
| "rewards/margins": 2.1418490409851074, | |
| "rewards/rejected": -1.6711241006851196, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5873884249928016, | |
| "grad_norm": 18.08453369140625, | |
| "learning_rate": 4.3635638326998194e-07, | |
| "logits/chosen": -0.7209752202033997, | |
| "logits/rejected": -0.6910983920097351, | |
| "logps/chosen": -144.9892120361328, | |
| "logps/rejected": -90.99137115478516, | |
| "loss": 0.3438, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.30342555046081543, | |
| "rewards/margins": 2.0735397338867188, | |
| "rewards/rejected": -1.7701140642166138, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5931471350417507, | |
| "grad_norm": 19.45905876159668, | |
| "learning_rate": 4.264017762557244e-07, | |
| "logits/chosen": -0.6922816634178162, | |
| "logits/rejected": -0.6663939952850342, | |
| "logps/chosen": -129.85174560546875, | |
| "logps/rejected": -93.9735336303711, | |
| "loss": 0.3056, | |
| "rewards/accuracies": 0.8718750476837158, | |
| "rewards/chosen": 0.280218243598938, | |
| "rewards/margins": 2.1616480350494385, | |
| "rewards/rejected": -1.8814294338226318, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5989058450906997, | |
| "grad_norm": 23.51837730407715, | |
| "learning_rate": 4.1647690195188225e-07, | |
| "logits/chosen": -0.7207853198051453, | |
| "logits/rejected": -0.6952850818634033, | |
| "logps/chosen": -129.96766662597656, | |
| "logps/rejected": -103.2422103881836, | |
| "loss": 0.3515, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.19336190819740295, | |
| "rewards/margins": 2.06461238861084, | |
| "rewards/rejected": -1.8712507486343384, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6046645551396487, | |
| "grad_norm": 10.306842803955078, | |
| "learning_rate": 4.0658576987660154e-07, | |
| "logits/chosen": -0.7156558036804199, | |
| "logits/rejected": -0.6903259754180908, | |
| "logps/chosen": -128.18174743652344, | |
| "logps/rejected": -92.14160919189453, | |
| "loss": 0.2842, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.19515244662761688, | |
| "rewards/margins": 2.040672779083252, | |
| "rewards/rejected": -1.845520257949829, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6104232651885978, | |
| "grad_norm": 30.55817413330078, | |
| "learning_rate": 3.9673237591661265e-07, | |
| "logits/chosen": -0.7145802974700928, | |
| "logits/rejected": -0.6979396343231201, | |
| "logps/chosen": -132.6157989501953, | |
| "logps/rejected": -100.8826675415039, | |
| "loss": 0.3722, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.2976403832435608, | |
| "rewards/margins": 2.033444881439209, | |
| "rewards/rejected": -1.735804557800293, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6161819752375468, | |
| "grad_norm": 15.014007568359375, | |
| "learning_rate": 3.8692070071294623e-07, | |
| "logits/chosen": -0.6951994299888611, | |
| "logits/rejected": -0.6723592281341553, | |
| "logps/chosen": -138.485107421875, | |
| "logps/rejected": -107.72498321533203, | |
| "loss": 0.3226, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.159155011177063, | |
| "rewards/margins": 2.102383613586426, | |
| "rewards/rejected": -1.9432284832000732, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6219406852864958, | |
| "grad_norm": 17.77401351928711, | |
| "learning_rate": 3.7715470805280694e-07, | |
| "logits/chosen": -0.7224422097206116, | |
| "logits/rejected": -0.7026802897453308, | |
| "logps/chosen": -128.47036743164062, | |
| "logps/rejected": -101.08393859863281, | |
| "loss": 0.3732, | |
| "rewards/accuracies": 0.8499999642372131, | |
| "rewards/chosen": 0.11434885859489441, | |
| "rewards/margins": 1.996582269668579, | |
| "rewards/rejected": -1.8822333812713623, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6276993953354448, | |
| "grad_norm": 17.239843368530273, | |
| "learning_rate": 3.6743834326825384e-07, | |
| "logits/chosen": -0.7175098657608032, | |
| "logits/rejected": -0.6943240761756897, | |
| "logps/chosen": -146.55691528320312, | |
| "logps/rejected": -115.63310241699219, | |
| "loss": 0.34, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": 0.2911383807659149, | |
| "rewards/margins": 2.0382208824157715, | |
| "rewards/rejected": -1.7470825910568237, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6334581053843938, | |
| "grad_norm": 14.190447807312012, | |
| "learning_rate": 3.577755316423411e-07, | |
| "logits/chosen": -0.7407609820365906, | |
| "logits/rejected": -0.7160014510154724, | |
| "logps/chosen": -133.70611572265625, | |
| "logps/rejected": -110.30499267578125, | |
| "loss": 0.3647, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.23839513957500458, | |
| "rewards/margins": 1.9629977941513062, | |
| "rewards/rejected": -1.7246026992797852, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.639216815433343, | |
| "grad_norm": 20.399852752685547, | |
| "learning_rate": 3.481701768233532e-07, | |
| "logits/chosen": -0.715621829032898, | |
| "logits/rejected": -0.6828228235244751, | |
| "logps/chosen": -127.63819885253906, | |
| "logps/rejected": -189.13275146484375, | |
| "loss": 0.5705, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.16459399461746216, | |
| "rewards/margins": 1.7133777141571045, | |
| "rewards/rejected": -1.5487836599349976, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.644975525482292, | |
| "grad_norm": 34.342124938964844, | |
| "learning_rate": 3.386261592477832e-07, | |
| "logits/chosen": -0.7274751663208008, | |
| "logits/rejected": -0.7126445770263672, | |
| "logps/chosen": -124.43698120117188, | |
| "logps/rejected": -102.45167541503906, | |
| "loss": 0.342, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.3266334533691406, | |
| "rewards/margins": 2.0077967643737793, | |
| "rewards/rejected": -1.6811633110046387, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.650734235531241, | |
| "grad_norm": 26.605039596557617, | |
| "learning_rate": 3.2914733457268874e-07, | |
| "logits/chosen": -0.7203111052513123, | |
| "logits/rejected": -0.7033373713493347, | |
| "logps/chosen": -120.60269165039062, | |
| "logps/rejected": -101.74288940429688, | |
| "loss": 0.3712, | |
| "rewards/accuracies": 0.8250000476837158, | |
| "rewards/chosen": 0.2722548544406891, | |
| "rewards/margins": 1.9117896556854248, | |
| "rewards/rejected": -1.6395349502563477, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.65649294558019, | |
| "grad_norm": 12.683929443359375, | |
| "learning_rate": 3.1973753211805593e-07, | |
| "logits/chosen": -0.7631603479385376, | |
| "logits/rejected": -0.7429041862487793, | |
| "logps/chosen": -118.06796264648438, | |
| "logps/rejected": -101.36824035644531, | |
| "loss": 0.346, | |
| "rewards/accuracies": 0.8718750476837158, | |
| "rewards/chosen": 0.1378428041934967, | |
| "rewards/margins": 1.9998619556427002, | |
| "rewards/rejected": -1.8620191812515259, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 15.436881065368652, | |
| "learning_rate": 3.1040055331980573e-07, | |
| "logits/chosen": -0.7278124690055847, | |
| "logits/rejected": -0.7010136842727661, | |
| "logps/chosen": -131.94236755371094, | |
| "logps/rejected": -107.63276672363281, | |
| "loss": 0.3427, | |
| "rewards/accuracies": 0.871874988079071, | |
| "rewards/chosen": 0.30168789625167847, | |
| "rewards/margins": 1.9916753768920898, | |
| "rewards/rejected": -1.6899876594543457, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6680103656780881, | |
| "grad_norm": 16.586223602294922, | |
| "learning_rate": 3.0114017019406355e-07, | |
| "logits/chosen": -0.7338711023330688, | |
| "logits/rejected": -0.7031553387641907, | |
| "logps/chosen": -135.208984375, | |
| "logps/rejected": -95.02485656738281, | |
| "loss": 0.3435, | |
| "rewards/accuracies": 0.8687500357627869, | |
| "rewards/chosen": 0.28808894753456116, | |
| "rewards/margins": 1.994765043258667, | |
| "rewards/rejected": -1.7066762447357178, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6737690757270371, | |
| "grad_norm": 15.553061485290527, | |
| "learning_rate": 2.9196012381331447e-07, | |
| "logits/chosen": -0.7527965307235718, | |
| "logits/rejected": -0.7202584147453308, | |
| "logps/chosen": -138.75991821289062, | |
| "logps/rejected": -97.48345947265625, | |
| "loss": 0.3684, | |
| "rewards/accuracies": 0.8687500357627869, | |
| "rewards/chosen": 0.2913173735141754, | |
| "rewards/margins": 2.0032336711883545, | |
| "rewards/rejected": -1.711916208267212, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6795277857759862, | |
| "grad_norm": 17.600046157836914, | |
| "learning_rate": 2.8286412279506e-07, | |
| "logits/chosen": -0.7335983514785767, | |
| "logits/rejected": -0.709388792514801, | |
| "logps/chosen": -194.87017822265625, | |
| "logps/rejected": -102.54981231689453, | |
| "loss": 0.345, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": 1.1104973554611206, | |
| "rewards/margins": 2.8248836994171143, | |
| "rewards/rejected": -1.7143861055374146, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6852864958249352, | |
| "grad_norm": 16.01340103149414, | |
| "learning_rate": 2.7385584180358454e-07, | |
| "logits/chosen": -0.7293104529380798, | |
| "logits/rejected": -0.7005465626716614, | |
| "logps/chosen": -125.95570373535156, | |
| "logps/rejected": -99.11387634277344, | |
| "loss": 0.2968, | |
| "rewards/accuracies": 0.878125011920929, | |
| "rewards/chosen": 0.33182141184806824, | |
| "rewards/margins": 2.125437021255493, | |
| "rewards/rejected": -1.793615460395813, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6910452058738843, | |
| "grad_norm": 18.85894203186035, | |
| "learning_rate": 2.6493892006544117e-07, | |
| "logits/chosen": -0.743747889995575, | |
| "logits/rejected": -0.7257972955703735, | |
| "logps/chosen": -144.69972229003906, | |
| "logps/rejected": -113.71995544433594, | |
| "loss": 0.354, | |
| "rewards/accuracies": 0.8437500596046448, | |
| "rewards/chosen": 0.2043149769306183, | |
| "rewards/margins": 1.9560476541519165, | |
| "rewards/rejected": -1.7517324686050415, | |
| "step": 1200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1737, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 653223570964480.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |