| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logps/chosen": -46.291481018066406, | |
| "logps/rejected": -63.38591384887695, | |
| "loss": 0.6933, | |
| "losses/dpo": 0.6924217939376831, | |
| "losses/sft": 1.2695705890655518, | |
| "losses/total": 0.6924217939376831, | |
| "ref_logps/chosen": -46.2892951965332, | |
| "ref_logps/rejected": -63.38469314575195, | |
| "rewards/accuracies": 0.46150001883506775, | |
| "rewards/chosen": -0.00021846062736585736, | |
| "rewards/margins": -9.587412932887673e-05, | |
| "rewards/rejected": -0.00012258654169272631, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logps/chosen": -44.6225471496582, | |
| "logps/rejected": -59.81039047241211, | |
| "loss": 0.6932, | |
| "losses/dpo": 0.6935679912567139, | |
| "losses/sft": 1.2453413009643555, | |
| "losses/total": 0.6935679912567139, | |
| "ref_logps/chosen": -44.617828369140625, | |
| "ref_logps/rejected": -59.8062744140625, | |
| "rewards/accuracies": 0.503000020980835, | |
| "rewards/chosen": -0.00047206657472997904, | |
| "rewards/margins": -6.079748345655389e-05, | |
| "rewards/rejected": -0.00041126925498247147, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logps/chosen": -43.78951644897461, | |
| "logps/rejected": -59.19944381713867, | |
| "loss": 0.6921, | |
| "losses/dpo": 0.6920485496520996, | |
| "losses/sft": 1.2684202194213867, | |
| "losses/total": 0.6920485496520996, | |
| "ref_logps/chosen": -43.78016662597656, | |
| "ref_logps/rejected": -59.168540954589844, | |
| "rewards/accuracies": 0.5205000042915344, | |
| "rewards/chosen": -0.0009349272586405277, | |
| "rewards/margins": 0.00215632701292634, | |
| "rewards/rejected": -0.003091254271566868, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logps/chosen": -44.4240837097168, | |
| "logps/rejected": -58.47013854980469, | |
| "loss": 0.6901, | |
| "losses/dpo": 0.6897304654121399, | |
| "losses/sft": 1.2445145845413208, | |
| "losses/total": 0.6897304654121399, | |
| "ref_logps/chosen": -44.39784622192383, | |
| "ref_logps/rejected": -58.38094711303711, | |
| "rewards/accuracies": 0.5600000619888306, | |
| "rewards/chosen": -0.0026234728284180164, | |
| "rewards/margins": 0.0062958355993032455, | |
| "rewards/rejected": -0.0089193070307374, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.976851851851852e-07, | |
| "logps/chosen": -44.144630432128906, | |
| "logps/rejected": -60.08927917480469, | |
| "loss": 0.6856, | |
| "losses/dpo": 0.6853728294372559, | |
| "losses/sft": 1.209934115409851, | |
| "losses/total": 0.6853728294372559, | |
| "ref_logps/chosen": -44.078399658203125, | |
| "ref_logps/rejected": -59.86705017089844, | |
| "rewards/accuracies": 0.5929999947547913, | |
| "rewards/chosen": -0.006622872781008482, | |
| "rewards/margins": 0.015600004233419895, | |
| "rewards/rejected": -0.02222287654876709, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.861111111111111e-07, | |
| "logps/chosen": -45.84950256347656, | |
| "logps/rejected": -62.66447830200195, | |
| "loss": 0.6775, | |
| "losses/dpo": 0.6779772043228149, | |
| "losses/sft": 1.281442403793335, | |
| "losses/total": 0.6779772043228149, | |
| "ref_logps/chosen": -45.6851921081543, | |
| "ref_logps/rejected": -62.169559478759766, | |
| "rewards/accuracies": 0.6209999918937683, | |
| "rewards/chosen": -0.01643071323633194, | |
| "rewards/margins": 0.033061932772397995, | |
| "rewards/rejected": -0.049492646008729935, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.74537037037037e-07, | |
| "logps/chosen": -44.53513717651367, | |
| "logps/rejected": -61.487178802490234, | |
| "loss": 0.6684, | |
| "losses/dpo": 0.6683259010314941, | |
| "losses/sft": 1.2735780477523804, | |
| "losses/total": 0.6683259010314941, | |
| "ref_logps/chosen": -44.296630859375, | |
| "ref_logps/rejected": -60.71174240112305, | |
| "rewards/accuracies": 0.6439999938011169, | |
| "rewards/chosen": -0.02384989708662033, | |
| "rewards/margins": 0.053693462163209915, | |
| "rewards/rejected": -0.07754335552453995, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6296296296296297e-07, | |
| "logps/chosen": -46.467018127441406, | |
| "logps/rejected": -63.491580963134766, | |
| "loss": 0.6563, | |
| "losses/dpo": 0.6521183252334595, | |
| "losses/sft": 1.2733054161071777, | |
| "losses/total": 0.6521183252334595, | |
| "ref_logps/chosen": -46.05796432495117, | |
| "ref_logps/rejected": -62.249488830566406, | |
| "rewards/accuracies": 0.6285000443458557, | |
| "rewards/chosen": -0.040905579924583435, | |
| "rewards/margins": 0.0833037793636322, | |
| "rewards/rejected": -0.12420935928821564, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.513888888888889e-07, | |
| "logps/chosen": -46.178504943847656, | |
| "logps/rejected": -64.19964599609375, | |
| "loss": 0.6429, | |
| "losses/dpo": 0.6435222029685974, | |
| "losses/sft": 1.2407046556472778, | |
| "losses/total": 0.6435222029685974, | |
| "ref_logps/chosen": -45.60074234008789, | |
| "ref_logps/rejected": -62.432167053222656, | |
| "rewards/accuracies": 0.6385000348091125, | |
| "rewards/chosen": -0.057775672525167465, | |
| "rewards/margins": 0.11897158622741699, | |
| "rewards/rejected": -0.17674726247787476, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.398148148148148e-07, | |
| "logps/chosen": -46.30465316772461, | |
| "logps/rejected": -62.2493782043457, | |
| "loss": 0.6363, | |
| "losses/dpo": 0.634566605091095, | |
| "losses/sft": 1.2551387548446655, | |
| "losses/total": 0.634566605091095, | |
| "ref_logps/chosen": -45.55400085449219, | |
| "ref_logps/rejected": -60.08576583862305, | |
| "rewards/accuracies": 0.6335000395774841, | |
| "rewards/chosen": -0.07506560534238815, | |
| "rewards/margins": 0.1412954032421112, | |
| "rewards/rejected": -0.21636100113391876, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.2824074074074075e-07, | |
| "logps/chosen": -45.26353073120117, | |
| "logps/rejected": -62.119075775146484, | |
| "loss": 0.6234, | |
| "losses/dpo": 0.6106441020965576, | |
| "losses/sft": 1.3094475269317627, | |
| "losses/total": 0.6106441020965576, | |
| "ref_logps/chosen": -44.300174713134766, | |
| "ref_logps/rejected": -59.338504791259766, | |
| "rewards/accuracies": 0.6434999704360962, | |
| "rewards/chosen": -0.09633561968803406, | |
| "rewards/margins": 0.1817215383052826, | |
| "rewards/rejected": -0.27805712819099426, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logps/chosen": -45.76985168457031, | |
| "logps/rejected": -63.50913619995117, | |
| "loss": 0.613, | |
| "losses/dpo": 0.6014246940612793, | |
| "losses/sft": 1.2986456155776978, | |
| "losses/total": 0.6014246940612793, | |
| "ref_logps/chosen": -44.60796356201172, | |
| "ref_logps/rejected": -60.128543853759766, | |
| "rewards/accuracies": 0.6544999480247498, | |
| "rewards/chosen": -0.11618894338607788, | |
| "rewards/margins": 0.2218700349330902, | |
| "rewards/rejected": -0.3380589783191681, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.050925925925926e-07, | |
| "logps/chosen": -46.350311279296875, | |
| "logps/rejected": -64.47545623779297, | |
| "loss": 0.6045, | |
| "losses/dpo": 0.6078605055809021, | |
| "losses/sft": 1.2730615139007568, | |
| "losses/total": 0.6078605055809021, | |
| "ref_logps/chosen": -44.98001480102539, | |
| "ref_logps/rejected": -60.557586669921875, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": -0.13702912628650665, | |
| "rewards/margins": 0.25475841760635376, | |
| "rewards/rejected": -0.3917875587940216, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.9351851851851854e-07, | |
| "logps/chosen": -46.390987396240234, | |
| "logps/rejected": -65.16218566894531, | |
| "loss": 0.5944, | |
| "losses/dpo": 0.5973597168922424, | |
| "losses/sft": 1.3051469326019287, | |
| "losses/total": 0.5973597168922424, | |
| "ref_logps/chosen": -44.8856086730957, | |
| "ref_logps/rejected": -60.589107513427734, | |
| "rewards/accuracies": 0.6424999833106995, | |
| "rewards/chosen": -0.15053769946098328, | |
| "rewards/margins": 0.30677077174186707, | |
| "rewards/rejected": -0.45730850100517273, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.819444444444444e-07, | |
| "logps/chosen": -46.03995132446289, | |
| "logps/rejected": -63.268062591552734, | |
| "loss": 0.5952, | |
| "losses/dpo": 0.5992311239242554, | |
| "losses/sft": 1.2882429361343384, | |
| "losses/total": 0.5992311239242554, | |
| "ref_logps/chosen": -44.33562469482422, | |
| "ref_logps/rejected": -58.439823150634766, | |
| "rewards/accuracies": 0.6640000343322754, | |
| "rewards/chosen": -0.17043215036392212, | |
| "rewards/margins": 0.31239163875579834, | |
| "rewards/rejected": -0.48282381892204285, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.703703703703703e-07, | |
| "logps/chosen": -45.34758377075195, | |
| "logps/rejected": -63.79766082763672, | |
| "loss": 0.5798, | |
| "losses/dpo": 0.573971152305603, | |
| "losses/sft": 1.2241010665893555, | |
| "losses/total": 0.573971152305603, | |
| "ref_logps/chosen": -43.607730865478516, | |
| "ref_logps/rejected": -58.310462951660156, | |
| "rewards/accuracies": 0.6694999933242798, | |
| "rewards/chosen": -0.17398566007614136, | |
| "rewards/margins": 0.3747338056564331, | |
| "rewards/rejected": -0.5487195253372192, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.587962962962963e-07, | |
| "logps/chosen": -45.958282470703125, | |
| "logps/rejected": -63.922576904296875, | |
| "loss": 0.5783, | |
| "losses/dpo": 0.5742554068565369, | |
| "losses/sft": 1.2882492542266846, | |
| "losses/total": 0.5742554068565369, | |
| "ref_logps/chosen": -43.95150375366211, | |
| "ref_logps/rejected": -58.02607345581055, | |
| "rewards/accuracies": 0.6690000295639038, | |
| "rewards/chosen": -0.20067782700061798, | |
| "rewards/margins": 0.38897278904914856, | |
| "rewards/rejected": -0.5896506309509277, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.472222222222222e-07, | |
| "logps/chosen": -47.257179260253906, | |
| "logps/rejected": -68.16796875, | |
| "loss": 0.5616, | |
| "losses/dpo": 0.5516221523284912, | |
| "losses/sft": 1.2813299894332886, | |
| "losses/total": 0.5516221523284912, | |
| "ref_logps/chosen": -45.21623611450195, | |
| "ref_logps/rejected": -61.444461822509766, | |
| "rewards/accuracies": 0.6890000104904175, | |
| "rewards/chosen": -0.20409366488456726, | |
| "rewards/margins": 0.4682568609714508, | |
| "rewards/rejected": -0.6723506450653076, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.3564814814814815e-07, | |
| "logps/chosen": -46.08806610107422, | |
| "logps/rejected": -65.21028137207031, | |
| "loss": 0.5651, | |
| "losses/dpo": 0.554095983505249, | |
| "losses/sft": 1.3418306112289429, | |
| "losses/total": 0.554095983505249, | |
| "ref_logps/chosen": -43.922706604003906, | |
| "ref_logps/rejected": -58.401084899902344, | |
| "rewards/accuracies": 0.6819999814033508, | |
| "rewards/chosen": -0.21653667092323303, | |
| "rewards/margins": 0.4643844664096832, | |
| "rewards/rejected": -0.680921196937561, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.2407407407407406e-07, | |
| "logps/chosen": -46.008277893066406, | |
| "logps/rejected": -66.00773620605469, | |
| "loss": 0.5556, | |
| "losses/dpo": 0.5685967803001404, | |
| "losses/sft": 1.2652785778045654, | |
| "losses/total": 0.5685967803001404, | |
| "ref_logps/chosen": -43.85118103027344, | |
| "ref_logps/rejected": -58.737857818603516, | |
| "rewards/accuracies": 0.6944999694824219, | |
| "rewards/chosen": -0.21570871770381927, | |
| "rewards/margins": 0.5112798810005188, | |
| "rewards/rejected": -0.7269885540008545, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logps/chosen": -45.792293548583984, | |
| "logps/rejected": -68.81733703613281, | |
| "loss": 0.5479, | |
| "losses/dpo": 0.5653673410415649, | |
| "losses/sft": 1.2111891508102417, | |
| "losses/total": 0.5653673410415649, | |
| "ref_logps/chosen": -43.507266998291016, | |
| "ref_logps/rejected": -60.62786102294922, | |
| "rewards/accuracies": 0.6984999775886536, | |
| "rewards/chosen": -0.2285033017396927, | |
| "rewards/margins": 0.5904435515403748, | |
| "rewards/rejected": -0.8189470171928406, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.0092592592592594e-07, | |
| "logps/chosen": -48.979248046875, | |
| "logps/rejected": -70.8954849243164, | |
| "loss": 0.5332, | |
| "losses/dpo": 0.5159104466438293, | |
| "losses/sft": 1.4047646522521973, | |
| "losses/total": 0.5159104466438293, | |
| "ref_logps/chosen": -46.33369064331055, | |
| "ref_logps/rejected": -61.91999816894531, | |
| "rewards/accuracies": 0.7049999833106995, | |
| "rewards/chosen": -0.2645554840564728, | |
| "rewards/margins": 0.6329929828643799, | |
| "rewards/rejected": -0.897548496723175, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.8935185185185185e-07, | |
| "logps/chosen": -47.573795318603516, | |
| "logps/rejected": -71.3143081665039, | |
| "loss": 0.5276, | |
| "losses/dpo": 0.5273745656013489, | |
| "losses/sft": 1.329495906829834, | |
| "losses/total": 0.5273745656013489, | |
| "ref_logps/chosen": -44.873233795166016, | |
| "ref_logps/rejected": -61.62533187866211, | |
| "rewards/accuracies": 0.6990000009536743, | |
| "rewards/chosen": -0.27005600929260254, | |
| "rewards/margins": 0.698841392993927, | |
| "rewards/rejected": -0.9688975811004639, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "logps/chosen": -48.737342834472656, | |
| "logps/rejected": -71.09253692626953, | |
| "loss": 0.5234, | |
| "losses/dpo": 0.5140664577484131, | |
| "losses/sft": 1.3185381889343262, | |
| "losses/total": 0.5140664577484131, | |
| "ref_logps/chosen": -45.823787689208984, | |
| "ref_logps/rejected": -60.519901275634766, | |
| "rewards/accuracies": 0.7139999866485596, | |
| "rewards/chosen": -0.2913552522659302, | |
| "rewards/margins": 0.7659080624580383, | |
| "rewards/rejected": -1.0572632551193237, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.662037037037037e-07, | |
| "logps/chosen": -46.89459991455078, | |
| "logps/rejected": -70.45913696289062, | |
| "loss": 0.5194, | |
| "losses/dpo": 0.5285363793373108, | |
| "losses/sft": 1.3135132789611816, | |
| "losses/total": 0.5285363793373108, | |
| "ref_logps/chosen": -44.250526428222656, | |
| "ref_logps/rejected": -59.6305046081543, | |
| "rewards/accuracies": 0.7170000672340393, | |
| "rewards/chosen": -0.2644067704677582, | |
| "rewards/margins": 0.8184568881988525, | |
| "rewards/rejected": -1.0828635692596436, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5462962962962963e-07, | |
| "logps/chosen": -48.4859619140625, | |
| "logps/rejected": -71.74458312988281, | |
| "loss": 0.522, | |
| "losses/dpo": 0.5093265771865845, | |
| "losses/sft": 1.342267632484436, | |
| "losses/total": 0.5093265771865845, | |
| "ref_logps/chosen": -45.3779182434082, | |
| "ref_logps/rejected": -60.191993713378906, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3108051121234894, | |
| "rewards/margins": 0.8444538712501526, | |
| "rewards/rejected": -1.1552588939666748, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.4305555555555555e-07, | |
| "logps/chosen": -47.598331451416016, | |
| "logps/rejected": -71.73502349853516, | |
| "loss": 0.5051, | |
| "losses/dpo": 0.5321754813194275, | |
| "losses/sft": 1.288547158241272, | |
| "losses/total": 0.5321754813194275, | |
| "ref_logps/chosen": -44.57024383544922, | |
| "ref_logps/rejected": -59.632904052734375, | |
| "rewards/accuracies": 0.734499990940094, | |
| "rewards/chosen": -0.3028090000152588, | |
| "rewards/margins": 0.9074033498764038, | |
| "rewards/rejected": -1.2102123498916626, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.3148148148148148e-07, | |
| "logps/chosen": -48.9836540222168, | |
| "logps/rejected": -75.99896240234375, | |
| "loss": 0.5039, | |
| "losses/dpo": 0.4898015558719635, | |
| "losses/sft": 1.3348312377929688, | |
| "losses/total": 0.4898015558719635, | |
| "ref_logps/chosen": -45.258949279785156, | |
| "ref_logps/rejected": -62.34097671508789, | |
| "rewards/accuracies": 0.7209999561309814, | |
| "rewards/chosen": -0.37246978282928467, | |
| "rewards/margins": 0.993329644203186, | |
| "rewards/rejected": -1.3657993078231812, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.199074074074074e-07, | |
| "logps/chosen": -49.37858581542969, | |
| "logps/rejected": -76.06786346435547, | |
| "loss": 0.5037, | |
| "losses/dpo": 0.4792703688144684, | |
| "losses/sft": 1.3326802253723145, | |
| "losses/total": 0.4792703688144684, | |
| "ref_logps/chosen": -45.24223709106445, | |
| "ref_logps/rejected": -61.8316764831543, | |
| "rewards/accuracies": 0.7134999632835388, | |
| "rewards/chosen": -0.4136350154876709, | |
| "rewards/margins": 1.0099844932556152, | |
| "rewards/rejected": -1.4236197471618652, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logps/chosen": -48.57896041870117, | |
| "logps/rejected": -71.33642578125, | |
| "loss": 0.511, | |
| "losses/dpo": 0.5458227396011353, | |
| "losses/sft": 1.2892143726348877, | |
| "losses/total": 0.5458227396011353, | |
| "ref_logps/chosen": -44.61341857910156, | |
| "ref_logps/rejected": -57.60921859741211, | |
| "rewards/accuracies": 0.7119999527931213, | |
| "rewards/chosen": -0.39655402302742004, | |
| "rewards/margins": 0.9761665463447571, | |
| "rewards/rejected": -1.3727205991744995, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9675925925925927e-07, | |
| "logps/chosen": -48.83073425292969, | |
| "logps/rejected": -76.26995849609375, | |
| "loss": 0.4896, | |
| "losses/dpo": 0.4852687120437622, | |
| "losses/sft": 1.3753533363342285, | |
| "losses/total": 0.4852687120437622, | |
| "ref_logps/chosen": -44.98310089111328, | |
| "ref_logps/rejected": -61.261417388916016, | |
| "rewards/accuracies": 0.7349998950958252, | |
| "rewards/chosen": -0.38476306200027466, | |
| "rewards/margins": 1.1160913705825806, | |
| "rewards/rejected": -1.5008544921875, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8518518518518516e-07, | |
| "logps/chosen": -49.678104400634766, | |
| "logps/rejected": -76.78030395507812, | |
| "loss": 0.5058, | |
| "losses/dpo": 0.4918399751186371, | |
| "losses/sft": 1.373268485069275, | |
| "losses/total": 0.4918399751186371, | |
| "ref_logps/chosen": -45.21029281616211, | |
| "ref_logps/rejected": -61.68964767456055, | |
| "rewards/accuracies": 0.7149999737739563, | |
| "rewards/chosen": -0.44678181409835815, | |
| "rewards/margins": 1.0622824430465698, | |
| "rewards/rejected": -1.5090643167495728, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.736111111111111e-07, | |
| "logps/chosen": -49.76744079589844, | |
| "logps/rejected": -77.67528533935547, | |
| "loss": 0.4877, | |
| "losses/dpo": 0.47753456234931946, | |
| "losses/sft": 1.3492231369018555, | |
| "losses/total": 0.47753456234931946, | |
| "ref_logps/chosen": -45.20912170410156, | |
| "ref_logps/rejected": -61.64081954956055, | |
| "rewards/accuracies": 0.7359998822212219, | |
| "rewards/chosen": -0.45583218336105347, | |
| "rewards/margins": 1.1476140022277832, | |
| "rewards/rejected": -1.6034462451934814, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.6203703703703703e-07, | |
| "logps/chosen": -48.476280212402344, | |
| "logps/rejected": -74.0830078125, | |
| "loss": 0.4932, | |
| "losses/dpo": 0.4669117331504822, | |
| "losses/sft": 1.3778959512710571, | |
| "losses/total": 0.4669117331504822, | |
| "ref_logps/chosen": -44.10759735107422, | |
| "ref_logps/rejected": -58.72689437866211, | |
| "rewards/accuracies": 0.7190000414848328, | |
| "rewards/chosen": -0.43686822056770325, | |
| "rewards/margins": 1.098743200302124, | |
| "rewards/rejected": -1.5356113910675049, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.5046296296296297e-07, | |
| "logps/chosen": -49.38998031616211, | |
| "logps/rejected": -76.47419738769531, | |
| "loss": 0.5013, | |
| "losses/dpo": 0.4948745667934418, | |
| "losses/sft": 1.3688327074050903, | |
| "losses/total": 0.4948745667934418, | |
| "ref_logps/chosen": -44.37105941772461, | |
| "ref_logps/rejected": -60.47099304199219, | |
| "rewards/accuracies": 0.7259998917579651, | |
| "rewards/chosen": -0.5018922686576843, | |
| "rewards/margins": 1.0984277725219727, | |
| "rewards/rejected": -1.6003201007843018, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3888888888888888e-07, | |
| "logps/chosen": -50.32158279418945, | |
| "logps/rejected": -77.23485565185547, | |
| "loss": 0.4968, | |
| "losses/dpo": 0.5039748549461365, | |
| "losses/sft": 1.3766621351242065, | |
| "losses/total": 0.5039748549461365, | |
| "ref_logps/chosen": -45.48880386352539, | |
| "ref_logps/rejected": -60.92485809326172, | |
| "rewards/accuracies": 0.7320000529289246, | |
| "rewards/chosen": -0.48327693343162537, | |
| "rewards/margins": 1.1477227210998535, | |
| "rewards/rejected": -1.6309998035430908, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2731481481481482e-07, | |
| "logps/chosen": -49.106754302978516, | |
| "logps/rejected": -75.4212417602539, | |
| "loss": 0.5004, | |
| "losses/dpo": 0.465129017829895, | |
| "losses/sft": 1.396937370300293, | |
| "losses/total": 0.465129017829895, | |
| "ref_logps/chosen": -44.43598556518555, | |
| "ref_logps/rejected": -59.51651382446289, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": -0.4670768678188324, | |
| "rewards/margins": 1.123395562171936, | |
| "rewards/rejected": -1.5904724597930908, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1574074074074074e-07, | |
| "logps/chosen": -49.315948486328125, | |
| "logps/rejected": -75.77275848388672, | |
| "loss": 0.4952, | |
| "losses/dpo": 0.48752278089523315, | |
| "losses/sft": 1.383622646331787, | |
| "losses/total": 0.48752278089523315, | |
| "ref_logps/chosen": -44.68310546875, | |
| "ref_logps/rejected": -59.506874084472656, | |
| "rewards/accuracies": 0.7290000915527344, | |
| "rewards/chosen": -0.4632847309112549, | |
| "rewards/margins": 1.1633038520812988, | |
| "rewards/rejected": -1.6265885829925537, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logps/chosen": -49.365966796875, | |
| "logps/rejected": -76.18710327148438, | |
| "loss": 0.4945, | |
| "losses/dpo": 0.5000871419906616, | |
| "losses/sft": 1.345529317855835, | |
| "losses/total": 0.5000871419906616, | |
| "ref_logps/chosen": -44.74851989746094, | |
| "ref_logps/rejected": -60.123268127441406, | |
| "rewards/accuracies": 0.7280000448226929, | |
| "rewards/chosen": -0.46174487471580505, | |
| "rewards/margins": 1.1446377038955688, | |
| "rewards/rejected": -1.6063826084136963, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.259259259259258e-08, | |
| "logps/chosen": -49.790008544921875, | |
| "logps/rejected": -76.8461685180664, | |
| "loss": 0.4891, | |
| "losses/dpo": 0.5052517056465149, | |
| "losses/sft": 1.3480595350265503, | |
| "losses/total": 0.5052517056465149, | |
| "ref_logps/chosen": -44.7381477355957, | |
| "ref_logps/rejected": -60.120697021484375, | |
| "rewards/accuracies": 0.7300000786781311, | |
| "rewards/chosen": -0.505186140537262, | |
| "rewards/margins": 1.167360782623291, | |
| "rewards/rejected": -1.6725467443466187, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.101851851851852e-08, | |
| "logps/chosen": -48.60505676269531, | |
| "logps/rejected": -75.21623992919922, | |
| "loss": 0.4956, | |
| "losses/dpo": 0.4796887934207916, | |
| "losses/sft": 1.3435637950897217, | |
| "losses/total": 0.4796887934207916, | |
| "ref_logps/chosen": -43.89369583129883, | |
| "ref_logps/rejected": -59.03510665893555, | |
| "rewards/accuracies": 0.718000054359436, | |
| "rewards/chosen": -0.47113633155822754, | |
| "rewards/margins": 1.146977186203003, | |
| "rewards/rejected": -1.618113398551941, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.944444444444444e-08, | |
| "logps/chosen": -49.14812469482422, | |
| "logps/rejected": -77.22814178466797, | |
| "loss": 0.4816, | |
| "losses/dpo": 0.49271050095558167, | |
| "losses/sft": 1.3505686521530151, | |
| "losses/total": 0.49271050095558167, | |
| "ref_logps/chosen": -44.201717376708984, | |
| "ref_logps/rejected": -60.2335205078125, | |
| "rewards/accuracies": 0.7355000376701355, | |
| "rewards/chosen": -0.4946403503417969, | |
| "rewards/margins": 1.2048207521438599, | |
| "rewards/rejected": -1.6994611024856567, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.787037037037037e-08, | |
| "logps/chosen": -50.257972717285156, | |
| "logps/rejected": -79.50613403320312, | |
| "loss": 0.4888, | |
| "losses/dpo": 0.4835771918296814, | |
| "losses/sft": 1.399458408355713, | |
| "losses/total": 0.4835771918296814, | |
| "ref_logps/chosen": -44.78007888793945, | |
| "ref_logps/rejected": -61.73722839355469, | |
| "rewards/accuracies": 0.7374999523162842, | |
| "rewards/chosen": -0.5477903485298157, | |
| "rewards/margins": 1.2291010618209839, | |
| "rewards/rejected": -1.7768914699554443, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.629629629629629e-08, | |
| "logps/chosen": -50.44590759277344, | |
| "logps/rejected": -80.34747314453125, | |
| "loss": 0.4749, | |
| "losses/dpo": 0.4630358815193176, | |
| "losses/sft": 1.3918135166168213, | |
| "losses/total": 0.4630358815193176, | |
| "ref_logps/chosen": -45.468910217285156, | |
| "ref_logps/rejected": -62.39448547363281, | |
| "rewards/accuracies": 0.7430000305175781, | |
| "rewards/chosen": -0.497699499130249, | |
| "rewards/margins": 1.297598123550415, | |
| "rewards/rejected": -1.7952975034713745, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.472222222222222e-08, | |
| "logps/chosen": -50.564212799072266, | |
| "logps/rejected": -77.6652603149414, | |
| "loss": 0.4844, | |
| "losses/dpo": 0.4967005252838135, | |
| "losses/sft": 1.3921661376953125, | |
| "losses/total": 0.4967005252838135, | |
| "ref_logps/chosen": -45.257930755615234, | |
| "ref_logps/rejected": -59.98822784423828, | |
| "rewards/accuracies": 0.7325000166893005, | |
| "rewards/chosen": -0.5306286811828613, | |
| "rewards/margins": 1.237074613571167, | |
| "rewards/rejected": -1.7677034139633179, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.3148148148148144e-08, | |
| "logps/chosen": -49.5255126953125, | |
| "logps/rejected": -78.27824401855469, | |
| "loss": 0.4747, | |
| "losses/dpo": 0.48569077253341675, | |
| "losses/sft": 1.3604434728622437, | |
| "losses/total": 0.48569077253341675, | |
| "ref_logps/chosen": -44.655574798583984, | |
| "ref_logps/rejected": -60.496063232421875, | |
| "rewards/accuracies": 0.7485000491142273, | |
| "rewards/chosen": -0.48699355125427246, | |
| "rewards/margins": 1.2912240028381348, | |
| "rewards/rejected": -1.7782177925109863, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1574074074074072e-08, | |
| "logps/chosen": -50.011077880859375, | |
| "logps/rejected": -75.67598724365234, | |
| "loss": 0.4876, | |
| "losses/dpo": 0.5032810568809509, | |
| "losses/sft": 1.366944432258606, | |
| "losses/total": 0.5032810568809509, | |
| "ref_logps/chosen": -44.80411148071289, | |
| "ref_logps/rejected": -58.415794372558594, | |
| "rewards/accuracies": 0.7414999604225159, | |
| "rewards/chosen": -0.5206969976425171, | |
| "rewards/margins": 1.2053215503692627, | |
| "rewards/rejected": -1.7260186672210693, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "logps/chosen": -51.40888595581055, | |
| "logps/rejected": -80.48424530029297, | |
| "loss": 0.469, | |
| "losses/dpo": 0.4639938771724701, | |
| "losses/sft": 1.3975021839141846, | |
| "losses/total": 0.4639938771724701, | |
| "ref_logps/chosen": -46.229427337646484, | |
| "ref_logps/rejected": -62.2370491027832, | |
| "rewards/accuracies": 0.7464999556541443, | |
| "rewards/chosen": -0.5179460644721985, | |
| "rewards/margins": 1.3067736625671387, | |
| "rewards/rejected": -1.8247196674346924, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1200, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5544587286313375, | |
| "train_runtime": 33858.4559, | |
| "train_samples_per_second": 2.835, | |
| "train_steps_per_second": 0.035 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 1200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 24000, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |