| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.988679245283019, |
| "eval_steps": 500, |
| "global_step": 396, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.326096763058934, |
| "learning_rate": 1.25e-08, |
| "logps/chosen": -36.02279281616211, |
| "logps/rejected": -41.85474395751953, |
| "loss": 0.6931, |
| "losses/dpo": 0.6931471824645996, |
| "losses/sft": 1.3949329853057861, |
| "losses/total": 0.6931471824645996, |
| "ref_logps/chosen": -36.02279281616211, |
| "ref_logps/rejected": -41.85474395751953, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 5.576784855809719, |
| "learning_rate": 2.5e-08, |
| "logps/chosen": -33.77919387817383, |
| "logps/rejected": -41.04405975341797, |
| "loss": 0.6931, |
| "losses/dpo": 0.6931471824645996, |
| "losses/sft": 1.3951497077941895, |
| "losses/total": 0.6931471824645996, |
| "ref_logps/chosen": -33.77919387817383, |
| "ref_logps/rejected": -41.04405975341797, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 6.263050301271669, |
| "learning_rate": 3.75e-08, |
| "logps/chosen": -38.8697509765625, |
| "logps/rejected": -48.85557556152344, |
| "loss": 0.6931, |
| "losses/dpo": 0.6860073804855347, |
| "losses/sft": 1.6376307010650635, |
| "losses/total": 0.6860073804855347, |
| "ref_logps/chosen": -38.87074279785156, |
| "ref_logps/rejected": -48.853511810302734, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": 9.899254655465484e-05, |
| "rewards/margins": 0.0003055855631828308, |
| "rewards/rejected": -0.0002065933949779719, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 5.738951829533344, |
| "learning_rate": 5e-08, |
| "logps/chosen": -36.64889144897461, |
| "logps/rejected": -42.698097229003906, |
| "loss": 0.6924, |
| "losses/dpo": 0.6935421228408813, |
| "losses/sft": 1.4897900819778442, |
| "losses/total": 0.6935421228408813, |
| "ref_logps/chosen": -36.668033599853516, |
| "ref_logps/rejected": -42.70002746582031, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": 0.0019142806995660067, |
| "rewards/margins": 0.001721527660265565, |
| "rewards/rejected": 0.00019275324302725494, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 6.637504299884417, |
| "learning_rate": 6.25e-08, |
| "logps/chosen": -41.41233825683594, |
| "logps/rejected": -47.04777145385742, |
| "loss": 0.6939, |
| "losses/dpo": 0.6956198215484619, |
| "losses/sft": 1.1974728107452393, |
| "losses/total": 0.6956198215484619, |
| "ref_logps/chosen": -41.40231704711914, |
| "ref_logps/rejected": -47.051856994628906, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.0010022701462730765, |
| "rewards/margins": -0.0014111174969002604, |
| "rewards/rejected": 0.00040884732152335346, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.760428498194468, |
| "learning_rate": 7.5e-08, |
| "logps/chosen": -34.51856994628906, |
| "logps/rejected": -41.675804138183594, |
| "loss": 0.6946, |
| "losses/dpo": 0.6942625641822815, |
| "losses/sft": 1.3869932889938354, |
| "losses/total": 0.6942625641822815, |
| "ref_logps/chosen": -34.49778747558594, |
| "ref_logps/rejected": -41.68275451660156, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": -0.0020779455080628395, |
| "rewards/margins": -0.002773313783109188, |
| "rewards/rejected": 0.0006953685078769922, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 5.773272972704967, |
| "learning_rate": 8.75e-08, |
| "logps/chosen": -36.6628303527832, |
| "logps/rejected": -42.856834411621094, |
| "loss": 0.6927, |
| "losses/dpo": 0.6944370269775391, |
| "losses/sft": 1.2695866823196411, |
| "losses/total": 0.6944370269775391, |
| "ref_logps/chosen": -36.669891357421875, |
| "ref_logps/rejected": -42.8528938293457, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0007062811637297273, |
| "rewards/margins": 0.0011000875383615494, |
| "rewards/rejected": -0.00039380654925480485, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 6.837371123256996, |
| "learning_rate": 1e-07, |
| "logps/chosen": -41.66258239746094, |
| "logps/rejected": -43.34931182861328, |
| "loss": 0.6943, |
| "losses/dpo": 0.6919102668762207, |
| "losses/sft": 1.317617654800415, |
| "losses/total": 0.6919102668762207, |
| "ref_logps/chosen": -41.65662384033203, |
| "ref_logps/rejected": -43.36621856689453, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0005958047113381326, |
| "rewards/margins": -0.0022863391786813736, |
| "rewards/rejected": 0.0016905345255509019, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 6.116952924505858, |
| "learning_rate": 1.125e-07, |
| "logps/chosen": -37.05712890625, |
| "logps/rejected": -46.517696380615234, |
| "loss": 0.6917, |
| "losses/dpo": 0.692311704158783, |
| "losses/sft": 1.112796664237976, |
| "losses/total": 0.692311704158783, |
| "ref_logps/chosen": -37.07765197753906, |
| "ref_logps/rejected": -46.507904052734375, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.0020526223815977573, |
| "rewards/margins": 0.003031900618225336, |
| "rewards/rejected": -0.0009792782366275787, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 5.761856386512759, |
| "learning_rate": 1.25e-07, |
| "logps/chosen": -33.799774169921875, |
| "logps/rejected": -41.23558044433594, |
| "loss": 0.6924, |
| "losses/dpo": 0.6941465139389038, |
| "losses/sft": 1.1185486316680908, |
| "losses/total": 0.6941465139389038, |
| "ref_logps/chosen": -33.81248474121094, |
| "ref_logps/rejected": -41.23206329345703, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": 0.0012711097951978445, |
| "rewards/margins": 0.0016234376234933734, |
| "rewards/rejected": -0.0003523279447108507, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 5.743856364003174, |
| "learning_rate": 1.375e-07, |
| "logps/chosen": -36.227317810058594, |
| "logps/rejected": -40.51737976074219, |
| "loss": 0.6927, |
| "losses/dpo": 0.6916883587837219, |
| "losses/sft": 1.4357692003250122, |
| "losses/total": 0.6916883587837219, |
| "ref_logps/chosen": -36.23785400390625, |
| "ref_logps/rejected": -40.51884078979492, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.0010535644832998514, |
| "rewards/margins": 0.0009071138338185847, |
| "rewards/rejected": 0.0001464505330659449, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 5.9204140360711355, |
| "learning_rate": 1.5e-07, |
| "logps/chosen": -38.817134857177734, |
| "logps/rejected": -42.217681884765625, |
| "loss": 0.6925, |
| "losses/dpo": 0.6985405683517456, |
| "losses/sft": 1.4544084072113037, |
| "losses/total": 0.6985405683517456, |
| "ref_logps/chosen": -38.83327102661133, |
| "ref_logps/rejected": -42.220176696777344, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": 0.0016135365003719926, |
| "rewards/margins": 0.0013644276186823845, |
| "rewards/rejected": 0.0002491088816896081, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.190779724671626, |
| "learning_rate": 1.625e-07, |
| "logps/chosen": -37.33137512207031, |
| "logps/rejected": -46.71794128417969, |
| "loss": 0.6901, |
| "losses/dpo": 0.6943342685699463, |
| "losses/sft": 1.3390721082687378, |
| "losses/total": 0.6943342685699463, |
| "ref_logps/chosen": -37.34603500366211, |
| "ref_logps/rejected": -46.670997619628906, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": 0.0014661503955721855, |
| "rewards/margins": 0.006160825490951538, |
| "rewards/rejected": -0.004694675095379353, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.535961166630566, |
| "learning_rate": 1.75e-07, |
| "logps/chosen": -34.35616683959961, |
| "logps/rejected": -40.568878173828125, |
| "loss": 0.6923, |
| "losses/dpo": 0.6914072036743164, |
| "losses/sft": 1.0790843963623047, |
| "losses/total": 0.6914072036743164, |
| "ref_logps/chosen": -34.35405731201172, |
| "ref_logps/rejected": -40.548362731933594, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -0.00021077337441965938, |
| "rewards/margins": 0.001840681186877191, |
| "rewards/rejected": -0.002051454270258546, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 5.994324182587906, |
| "learning_rate": 1.875e-07, |
| "logps/chosen": -35.86518859863281, |
| "logps/rejected": -41.03656005859375, |
| "loss": 0.6932, |
| "losses/dpo": 0.690587043762207, |
| "losses/sft": 1.5208988189697266, |
| "losses/total": 0.690587043762207, |
| "ref_logps/chosen": -35.85986328125, |
| "ref_logps/rejected": -41.031028747558594, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.0005322899669408798, |
| "rewards/margins": 2.0701438188552856e-05, |
| "rewards/rejected": -0.00055299187079072, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 5.908538195984286, |
| "learning_rate": 2e-07, |
| "logps/chosen": -36.70806884765625, |
| "logps/rejected": -39.733882904052734, |
| "loss": 0.6926, |
| "losses/dpo": 0.6891317963600159, |
| "losses/sft": 1.1712957620620728, |
| "losses/total": 0.6891317963600159, |
| "ref_logps/chosen": -36.713016510009766, |
| "ref_logps/rejected": -39.726341247558594, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": 0.0004950040020048618, |
| "rewards/margins": 0.0012491128873080015, |
| "rewards/rejected": -0.0007541090017184615, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 6.17892190890487, |
| "learning_rate": 2.1249999999999998e-07, |
| "logps/chosen": -38.60041046142578, |
| "logps/rejected": -43.30579376220703, |
| "loss": 0.694, |
| "losses/dpo": 0.6916015148162842, |
| "losses/sft": 1.3250274658203125, |
| "losses/total": 0.6916015148162842, |
| "ref_logps/chosen": -38.579288482666016, |
| "ref_logps/rejected": -43.300140380859375, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": -0.0021122824400663376, |
| "rewards/margins": -0.001547331572510302, |
| "rewards/rejected": -0.0005649511003866792, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 6.020104058580606, |
| "learning_rate": 2.25e-07, |
| "logps/chosen": -37.50771713256836, |
| "logps/rejected": -41.76362609863281, |
| "loss": 0.6919, |
| "losses/dpo": 0.6925865411758423, |
| "losses/sft": 1.3761729001998901, |
| "losses/total": 0.6925865411758423, |
| "ref_logps/chosen": -37.507423400878906, |
| "ref_logps/rejected": -41.736366271972656, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -2.9282993637025356e-05, |
| "rewards/margins": 0.0026969274040311575, |
| "rewards/rejected": -0.002726210281252861, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 5.765433605501229, |
| "learning_rate": 2.3749999999999998e-07, |
| "logps/chosen": -32.96650695800781, |
| "logps/rejected": -42.942771911621094, |
| "loss": 0.6917, |
| "losses/dpo": 0.6924209594726562, |
| "losses/sft": 1.1748046875, |
| "losses/total": 0.6924209594726562, |
| "ref_logps/chosen": -32.971160888671875, |
| "ref_logps/rejected": -42.91703796386719, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": 0.00046532286796718836, |
| "rewards/margins": 0.0030381008982658386, |
| "rewards/rejected": -0.002572778146713972, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.824641204085951, |
| "learning_rate": 2.5e-07, |
| "logps/chosen": -37.962188720703125, |
| "logps/rejected": -43.213279724121094, |
| "loss": 0.6932, |
| "losses/dpo": 0.694710373878479, |
| "losses/sft": 1.2030720710754395, |
| "losses/total": 0.694710373878479, |
| "ref_logps/chosen": -37.919189453125, |
| "ref_logps/rejected": -43.171043395996094, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -0.0043003251776099205, |
| "rewards/margins": -7.65085278544575e-05, |
| "rewards/rejected": -0.004223817028105259, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 6.193915477797873, |
| "learning_rate": 2.625e-07, |
| "logps/chosen": -37.831146240234375, |
| "logps/rejected": -47.295005798339844, |
| "loss": 0.6954, |
| "losses/dpo": 0.6994068622589111, |
| "losses/sft": 1.2481111288070679, |
| "losses/total": 0.6994068622589111, |
| "ref_logps/chosen": -37.76181411743164, |
| "ref_logps/rejected": -47.269264221191406, |
| "rewards/accuracies": 0.4453125, |
| "rewards/chosen": -0.006933108903467655, |
| "rewards/margins": -0.004359052516520023, |
| "rewards/rejected": -0.002574056386947632, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.703247411276419, |
| "learning_rate": 2.75e-07, |
| "logps/chosen": -34.446189880371094, |
| "logps/rejected": -42.82508850097656, |
| "loss": 0.6939, |
| "losses/dpo": 0.6885063648223877, |
| "losses/sft": 1.2574893236160278, |
| "losses/total": 0.6885063648223877, |
| "ref_logps/chosen": -34.39026641845703, |
| "ref_logps/rejected": -42.782203674316406, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": -0.005592696368694305, |
| "rewards/margins": -0.0013045003870502114, |
| "rewards/rejected": -0.004288196098059416, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 5.586186575504996, |
| "learning_rate": 2.8749999999999995e-07, |
| "logps/chosen": -35.78218078613281, |
| "logps/rejected": -46.140350341796875, |
| "loss": 0.6929, |
| "losses/dpo": 0.6936126351356506, |
| "losses/sft": 1.4059488773345947, |
| "losses/total": 0.6936126351356506, |
| "ref_logps/chosen": -35.713783264160156, |
| "ref_logps/rejected": -46.065738677978516, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.00683995708823204, |
| "rewards/margins": 0.0006212468724697828, |
| "rewards/rejected": -0.00746120372787118, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 6.2807240194752, |
| "learning_rate": 3e-07, |
| "logps/chosen": -37.896644592285156, |
| "logps/rejected": -43.448909759521484, |
| "loss": 0.6914, |
| "losses/dpo": 0.6850650310516357, |
| "losses/sft": 1.4576250314712524, |
| "losses/total": 0.6850650310516357, |
| "ref_logps/chosen": -37.83003616333008, |
| "ref_logps/rejected": -43.34458923339844, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -0.00666093360632658, |
| "rewards/margins": 0.00377137353643775, |
| "rewards/rejected": -0.010432307608425617, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 5.6714251513252485, |
| "learning_rate": 3.1249999999999997e-07, |
| "logps/chosen": -36.5435791015625, |
| "logps/rejected": -41.46415710449219, |
| "loss": 0.6923, |
| "losses/dpo": 0.6902315020561218, |
| "losses/sft": 1.3371169567108154, |
| "losses/total": 0.6902315020561218, |
| "ref_logps/chosen": -36.474647521972656, |
| "ref_logps/rejected": -41.37662887573242, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -0.006893564946949482, |
| "rewards/margins": 0.001859544194303453, |
| "rewards/rejected": -0.00875310879200697, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 6.279222054280467, |
| "learning_rate": 3.25e-07, |
| "logps/chosen": -37.0484733581543, |
| "logps/rejected": -44.5318603515625, |
| "loss": 0.6919, |
| "losses/dpo": 0.6916804313659668, |
| "losses/sft": 1.2641081809997559, |
| "losses/total": 0.6916804313659668, |
| "ref_logps/chosen": -36.94280242919922, |
| "ref_logps/rejected": -44.40016174316406, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.010566946119070053, |
| "rewards/margins": 0.0026026167906820774, |
| "rewards/rejected": -0.013169562444090843, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.569722658657549, |
| "learning_rate": 3.375e-07, |
| "logps/chosen": -33.24622344970703, |
| "logps/rejected": -39.62266540527344, |
| "loss": 0.6918, |
| "losses/dpo": 0.6929441094398499, |
| "losses/sft": 1.0789260864257812, |
| "losses/total": 0.6929441094398499, |
| "ref_logps/chosen": -33.152587890625, |
| "ref_logps/rejected": -39.500396728515625, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.009363781660795212, |
| "rewards/margins": 0.002862950786948204, |
| "rewards/rejected": -0.012226731516420841, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 6.178096448266021, |
| "learning_rate": 3.5e-07, |
| "logps/chosen": -40.909210205078125, |
| "logps/rejected": -43.54678726196289, |
| "loss": 0.6932, |
| "losses/dpo": 0.6962201595306396, |
| "losses/sft": 1.4941421747207642, |
| "losses/total": 0.6962201595306396, |
| "ref_logps/chosen": -40.74734878540039, |
| "ref_logps/rejected": -43.38502883911133, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.016186244785785675, |
| "rewards/margins": -1.0382413165643811e-05, |
| "rewards/rejected": -0.01617586426436901, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 6.351428202937179, |
| "learning_rate": 3.6249999999999997e-07, |
| "logps/chosen": -38.18675231933594, |
| "logps/rejected": -45.641944885253906, |
| "loss": 0.6918, |
| "losses/dpo": 0.6936982870101929, |
| "losses/sft": 1.5615143775939941, |
| "losses/total": 0.6936982870101929, |
| "ref_logps/chosen": -38.07172775268555, |
| "ref_logps/rejected": -45.49729919433594, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.011502932757139206, |
| "rewards/margins": 0.002961072139441967, |
| "rewards/rejected": -0.014464004896581173, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.197352455942284, |
| "learning_rate": 3.75e-07, |
| "logps/chosen": -38.10821533203125, |
| "logps/rejected": -44.8267707824707, |
| "loss": 0.6913, |
| "losses/dpo": 0.6886826753616333, |
| "losses/sft": 1.2551246881484985, |
| "losses/total": 0.6886826753616333, |
| "ref_logps/chosen": -37.95869827270508, |
| "ref_logps/rejected": -44.637386322021484, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -0.014951780438423157, |
| "rewards/margins": 0.00398671068251133, |
| "rewards/rejected": -0.018938491120934486, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 6.109331160092928, |
| "learning_rate": 3.875e-07, |
| "logps/chosen": -37.73468017578125, |
| "logps/rejected": -45.03502655029297, |
| "loss": 0.6939, |
| "losses/dpo": 0.6942582130432129, |
| "losses/sft": 1.307703971862793, |
| "losses/total": 0.6942582130432129, |
| "ref_logps/chosen": -37.55635070800781, |
| "ref_logps/rejected": -44.870731353759766, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.01783285290002823, |
| "rewards/margins": -0.0014032268663868308, |
| "rewards/rejected": -0.01642962545156479, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 5.667604081077805, |
| "learning_rate": 4e-07, |
| "logps/chosen": -35.0442008972168, |
| "logps/rejected": -43.61913299560547, |
| "loss": 0.6936, |
| "losses/dpo": 0.7014378309249878, |
| "losses/sft": 1.3467621803283691, |
| "losses/total": 0.7014378309249878, |
| "ref_logps/chosen": -34.83974838256836, |
| "ref_logps/rejected": -43.42043685913086, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.02044512704014778, |
| "rewards/margins": -0.0005755843594670296, |
| "rewards/rejected": -0.019869543612003326, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 5.678502382891613, |
| "learning_rate": 4.1249999999999997e-07, |
| "logps/chosen": -32.68036651611328, |
| "logps/rejected": -37.5178337097168, |
| "loss": 0.6907, |
| "losses/dpo": 0.6907713413238525, |
| "losses/sft": 1.0900930166244507, |
| "losses/total": 0.6907713413238525, |
| "ref_logps/chosen": -32.45442199707031, |
| "ref_logps/rejected": -37.23982238769531, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -0.0225942712277174, |
| "rewards/margins": 0.005206821020692587, |
| "rewards/rejected": -0.027801092714071274, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 5.921573916861138, |
| "learning_rate": 4.2499999999999995e-07, |
| "logps/chosen": -38.70556640625, |
| "logps/rejected": -44.084251403808594, |
| "loss": 0.6917, |
| "losses/dpo": 0.6954081058502197, |
| "losses/sft": 1.3240528106689453, |
| "losses/total": 0.6954081058502197, |
| "ref_logps/chosen": -38.448394775390625, |
| "ref_logps/rejected": -43.795318603515625, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -0.025717251002788544, |
| "rewards/margins": 0.0031764586456120014, |
| "rewards/rejected": -0.02889370732009411, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 6.262461804418144, |
| "learning_rate": 4.375e-07, |
| "logps/chosen": -38.28644561767578, |
| "logps/rejected": -44.10706329345703, |
| "loss": 0.6906, |
| "losses/dpo": 0.6915950179100037, |
| "losses/sft": 1.5137853622436523, |
| "losses/total": 0.6915950179100037, |
| "ref_logps/chosen": -37.993186950683594, |
| "ref_logps/rejected": -43.75933837890625, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -0.029325801879167557, |
| "rewards/margins": 0.005446841474622488, |
| "rewards/rejected": -0.03477264195680618, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 6.51719678701152, |
| "learning_rate": 4.5e-07, |
| "logps/chosen": -38.827396392822266, |
| "logps/rejected": -44.608299255371094, |
| "loss": 0.6898, |
| "losses/dpo": 0.6939886212348938, |
| "losses/sft": 1.1804178953170776, |
| "losses/total": 0.6939886212348938, |
| "ref_logps/chosen": -38.524925231933594, |
| "ref_logps/rejected": -44.23436737060547, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.03024711087346077, |
| "rewards/margins": 0.007146051619201899, |
| "rewards/rejected": -0.03739316016435623, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.054369450491099, |
| "learning_rate": 4.625e-07, |
| "logps/chosen": -37.75469970703125, |
| "logps/rejected": -40.86686706542969, |
| "loss": 0.69, |
| "losses/dpo": 0.6901004910469055, |
| "losses/sft": 1.2039740085601807, |
| "losses/total": 0.6901004910469055, |
| "ref_logps/chosen": -37.38399124145508, |
| "ref_logps/rejected": -40.42985534667969, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.03707098215818405, |
| "rewards/margins": 0.006630584131926298, |
| "rewards/rejected": -0.04370156675577164, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 5.811128045517565, |
| "learning_rate": 4.7499999999999995e-07, |
| "logps/chosen": -37.347511291503906, |
| "logps/rejected": -42.597320556640625, |
| "loss": 0.6932, |
| "losses/dpo": 0.6947627663612366, |
| "losses/sft": 1.5172080993652344, |
| "losses/total": 0.6947627663612366, |
| "ref_logps/chosen": -36.92784881591797, |
| "ref_logps/rejected": -42.17408752441406, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -0.041966233402490616, |
| "rewards/margins": 0.00035727641079574823, |
| "rewards/rejected": -0.042323507368564606, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 6.232125527781941, |
| "learning_rate": 4.875e-07, |
| "logps/chosen": -35.76224899291992, |
| "logps/rejected": -40.480010986328125, |
| "loss": 0.6877, |
| "losses/dpo": 0.6863731741905212, |
| "losses/sft": 1.403287410736084, |
| "losses/total": 0.6863731741905212, |
| "ref_logps/chosen": -35.296600341796875, |
| "ref_logps/rejected": -39.899620056152344, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": -0.04656480997800827, |
| "rewards/margins": 0.011474234983325005, |
| "rewards/rejected": -0.058039046823978424, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 5.919014140290479, |
| "learning_rate": 5e-07, |
| "logps/chosen": -33.405452728271484, |
| "logps/rejected": -40.23749542236328, |
| "loss": 0.6917, |
| "losses/dpo": 0.7027544975280762, |
| "losses/sft": 1.5135366916656494, |
| "losses/total": 0.7027544975280762, |
| "ref_logps/chosen": -32.92824935913086, |
| "ref_logps/rejected": -39.72548294067383, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": -0.04772059991955757, |
| "rewards/margins": 0.0034805855248123407, |
| "rewards/rejected": -0.05120118334889412, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 6.156564024356789, |
| "learning_rate": 4.985955056179775e-07, |
| "logps/chosen": -33.48844528198242, |
| "logps/rejected": -40.55287551879883, |
| "loss": 0.6886, |
| "losses/dpo": 0.6869601011276245, |
| "losses/sft": 1.2104542255401611, |
| "losses/total": 0.6869601011276245, |
| "ref_logps/chosen": -32.960693359375, |
| "ref_logps/rejected": -39.92742919921875, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.052775099873542786, |
| "rewards/margins": 0.009769486263394356, |
| "rewards/rejected": -0.06254458427429199, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.9874098679402445, |
| "learning_rate": 4.97191011235955e-07, |
| "logps/chosen": -37.491756439208984, |
| "logps/rejected": -44.21824645996094, |
| "loss": 0.6903, |
| "losses/dpo": 0.6947405934333801, |
| "losses/sft": 1.5526431798934937, |
| "losses/total": 0.6947405934333801, |
| "ref_logps/chosen": -36.944496154785156, |
| "ref_logps/rejected": -43.608970642089844, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": -0.05472607538104057, |
| "rewards/margins": 0.006201753858476877, |
| "rewards/rejected": -0.060927826911211014, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 5.890690107516997, |
| "learning_rate": 4.957865168539325e-07, |
| "logps/chosen": -37.96784210205078, |
| "logps/rejected": -44.18370056152344, |
| "loss": 0.6911, |
| "losses/dpo": 0.6906970143318176, |
| "losses/sft": 1.5630677938461304, |
| "losses/total": 0.6906970143318176, |
| "ref_logps/chosen": -37.31348419189453, |
| "ref_logps/rejected": -43.480613708496094, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": -0.06543563306331635, |
| "rewards/margins": 0.0048727355897426605, |
| "rewards/rejected": -0.07030836492776871, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 5.6620046922389475, |
| "learning_rate": 4.943820224719101e-07, |
| "logps/chosen": -33.37147903442383, |
| "logps/rejected": -40.975284576416016, |
| "loss": 0.6876, |
| "losses/dpo": 0.6959986090660095, |
| "losses/sft": 1.4914252758026123, |
| "losses/total": 0.6959986090660095, |
| "ref_logps/chosen": -32.77292251586914, |
| "ref_logps/rejected": -40.25656509399414, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.05985547974705696, |
| "rewards/margins": 0.012016610242426395, |
| "rewards/rejected": -0.07187209278345108, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 5.975010174130114, |
| "learning_rate": 4.929775280898877e-07, |
| "logps/chosen": -36.01771545410156, |
| "logps/rejected": -40.14152145385742, |
| "loss": 0.686, |
| "losses/dpo": 0.6887790560722351, |
| "losses/sft": 1.0922722816467285, |
| "losses/total": 0.6887790560722351, |
| "ref_logps/chosen": -35.33580017089844, |
| "ref_logps/rejected": -39.30628204345703, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.06819140911102295, |
| "rewards/margins": 0.015332860872149467, |
| "rewards/rejected": -0.08352427184581757, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 5.941321729150391, |
| "learning_rate": 4.915730337078651e-07, |
| "logps/chosen": -37.23257064819336, |
| "logps/rejected": -43.88367462158203, |
| "loss": 0.6872, |
| "losses/dpo": 0.6943268775939941, |
| "losses/sft": 1.3947020769119263, |
| "losses/total": 0.6943268775939941, |
| "ref_logps/chosen": -36.47919845581055, |
| "ref_logps/rejected": -43.00157165527344, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.07533714175224304, |
| "rewards/margins": 0.012872692197561264, |
| "rewards/rejected": -0.08820983022451401, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 6.015115529321941, |
| "learning_rate": 4.901685393258427e-07, |
| "logps/chosen": -40.33525085449219, |
| "logps/rejected": -41.87712478637695, |
| "loss": 0.6904, |
| "losses/dpo": 0.6985194683074951, |
| "losses/sft": 1.4163267612457275, |
| "losses/total": 0.6985194683074951, |
| "ref_logps/chosen": -39.42625045776367, |
| "ref_logps/rejected": -40.897945404052734, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -0.09090035408735275, |
| "rewards/margins": 0.007017695810645819, |
| "rewards/rejected": -0.09791804850101471, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 5.733756968847646, |
| "learning_rate": 4.887640449438202e-07, |
| "logps/chosen": -36.97081756591797, |
| "logps/rejected": -42.80936050415039, |
| "loss": 0.6882, |
| "losses/dpo": 0.696724534034729, |
| "losses/sft": 1.2510000467300415, |
| "losses/total": 0.696724534034729, |
| "ref_logps/chosen": -36.11339569091797, |
| "ref_logps/rejected": -41.84064483642578, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.08574248850345612, |
| "rewards/margins": 0.011129248887300491, |
| "rewards/rejected": -0.09687173366546631, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 5.879327522594857, |
| "learning_rate": 4.873595505617978e-07, |
| "logps/chosen": -33.350772857666016, |
| "logps/rejected": -41.509918212890625, |
| "loss": 0.6838, |
| "losses/dpo": 0.6769124269485474, |
| "losses/sft": 1.218217372894287, |
| "losses/total": 0.6769124269485474, |
| "ref_logps/chosen": -32.432064056396484, |
| "ref_logps/rejected": -40.38986587524414, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.09187072515487671, |
| "rewards/margins": 0.020134272053837776, |
| "rewards/rejected": -0.11200500279664993, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 6.158412418626052, |
| "learning_rate": 4.859550561797752e-07, |
| "logps/chosen": -38.05060577392578, |
| "logps/rejected": -45.274757385253906, |
| "loss": 0.6828, |
| "losses/dpo": 0.6935728788375854, |
| "losses/sft": 1.33810293674469, |
| "losses/total": 0.6935728788375854, |
| "ref_logps/chosen": -36.988319396972656, |
| "ref_logps/rejected": -43.98942565917969, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.10622845590114594, |
| "rewards/margins": 0.022304760292172432, |
| "rewards/rejected": -0.12853321433067322, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 6.285183993622672, |
| "learning_rate": 4.845505617977528e-07, |
| "logps/chosen": -38.64442443847656, |
| "logps/rejected": -42.03549575805664, |
| "loss": 0.6823, |
| "losses/dpo": 0.6838083267211914, |
| "losses/sft": 1.414647102355957, |
| "losses/total": 0.6838083267211914, |
| "ref_logps/chosen": -37.58879089355469, |
| "ref_logps/rejected": -40.74079132080078, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.1055637076497078, |
| "rewards/margins": 0.023906776681542397, |
| "rewards/rejected": -0.12947048246860504, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 5.8676763170916155, |
| "learning_rate": 4.831460674157303e-07, |
| "logps/chosen": -35.28524398803711, |
| "logps/rejected": -43.29574966430664, |
| "loss": 0.6824, |
| "losses/dpo": 0.6761789321899414, |
| "losses/sft": 1.1140950918197632, |
| "losses/total": 0.6761789321899414, |
| "ref_logps/chosen": -34.05834197998047, |
| "ref_logps/rejected": -41.828880310058594, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -0.12269000709056854, |
| "rewards/margins": 0.023997044190764427, |
| "rewards/rejected": -0.14668706059455872, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.153262147929733, |
| "learning_rate": 4.817415730337078e-07, |
| "logps/chosen": -34.02470016479492, |
| "logps/rejected": -38.51059341430664, |
| "loss": 0.6808, |
| "losses/dpo": 0.6883823871612549, |
| "losses/sft": 1.1792895793914795, |
| "losses/total": 0.6883823871612549, |
| "ref_logps/chosen": -32.95384216308594, |
| "ref_logps/rejected": -37.16828155517578, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.10708627104759216, |
| "rewards/margins": 0.02714475244283676, |
| "rewards/rejected": -0.13423103094100952, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 5.930275284806721, |
| "learning_rate": 4.803370786516854e-07, |
| "logps/chosen": -39.90019989013672, |
| "logps/rejected": -41.967960357666016, |
| "loss": 0.6817, |
| "losses/dpo": 0.6764520406723022, |
| "losses/sft": 1.4552464485168457, |
| "losses/total": 0.6764520406723022, |
| "ref_logps/chosen": -38.590492248535156, |
| "ref_logps/rejected": -40.406002044677734, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.1309707909822464, |
| "rewards/margins": 0.025225069373846054, |
| "rewards/rejected": -0.15619586408138275, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.991852629106404, |
| "learning_rate": 4.789325842696629e-07, |
| "logps/chosen": -37.67607116699219, |
| "logps/rejected": -42.05184555053711, |
| "loss": 0.6906, |
| "losses/dpo": 0.6933637261390686, |
| "losses/sft": 1.3182023763656616, |
| "losses/total": 0.6933637261390686, |
| "ref_logps/chosen": -36.22807312011719, |
| "ref_logps/rejected": -40.539833068847656, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.14479960501194, |
| "rewards/margins": 0.006401616148650646, |
| "rewards/rejected": -0.15120121836662292, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 5.887970211048207, |
| "learning_rate": 4.775280898876405e-07, |
| "logps/chosen": -36.0313835144043, |
| "logps/rejected": -41.34480285644531, |
| "loss": 0.6806, |
| "losses/dpo": 0.6776463985443115, |
| "losses/sft": 1.3762413263320923, |
| "losses/total": 0.6776463985443115, |
| "ref_logps/chosen": -34.71417999267578, |
| "ref_logps/rejected": -39.74456024169922, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.131720170378685, |
| "rewards/margins": 0.02830405905842781, |
| "rewards/rejected": -0.1600242257118225, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 6.202121890129415, |
| "learning_rate": 4.7612359550561797e-07, |
| "logps/chosen": -39.138973236083984, |
| "logps/rejected": -44.62040710449219, |
| "loss": 0.6868, |
| "losses/dpo": 0.7154799699783325, |
| "losses/sft": 1.4311680793762207, |
| "losses/total": 0.7154799699783325, |
| "ref_logps/chosen": -37.48638153076172, |
| "ref_logps/rejected": -42.81147003173828, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.1652592122554779, |
| "rewards/margins": 0.01563437283039093, |
| "rewards/rejected": -0.18089357018470764, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.251681349620222, |
| "learning_rate": 4.747191011235955e-07, |
| "logps/chosen": -35.74232864379883, |
| "logps/rejected": -41.246910095214844, |
| "loss": 0.6762, |
| "losses/dpo": 0.6785226464271545, |
| "losses/sft": 1.2456488609313965, |
| "losses/total": 0.6785226464271545, |
| "ref_logps/chosen": -34.32046890258789, |
| "ref_logps/rejected": -39.45071792602539, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.14218537509441376, |
| "rewards/margins": 0.037433870136737823, |
| "rewards/rejected": -0.17961923778057098, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 6.186489597098538, |
| "learning_rate": 4.7331460674157303e-07, |
| "logps/chosen": -38.993804931640625, |
| "logps/rejected": -48.68840789794922, |
| "loss": 0.6799, |
| "losses/dpo": 0.6576354503631592, |
| "losses/sft": 1.2577842473983765, |
| "losses/total": 0.6576354503631592, |
| "ref_logps/chosen": -37.452022552490234, |
| "ref_logps/rejected": -46.83843994140625, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.15417808294296265, |
| "rewards/margins": 0.03081856295466423, |
| "rewards/rejected": -0.18499664962291718, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 6.176384076659114, |
| "learning_rate": 4.7191011235955054e-07, |
| "logps/chosen": -36.94293975830078, |
| "logps/rejected": -43.75997543334961, |
| "loss": 0.6818, |
| "losses/dpo": 0.6777645349502563, |
| "losses/sft": 1.4646830558776855, |
| "losses/total": 0.6777645349502563, |
| "ref_logps/chosen": -35.410682678222656, |
| "ref_logps/rejected": -41.96450424194336, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.15322577953338623, |
| "rewards/margins": 0.026321690529584885, |
| "rewards/rejected": -0.17954745888710022, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 6.222192772165732, |
| "learning_rate": 4.705056179775281e-07, |
| "logps/chosen": -38.04816436767578, |
| "logps/rejected": -46.636329650878906, |
| "loss": 0.6813, |
| "losses/dpo": 0.6915292739868164, |
| "losses/sft": 1.5139144659042358, |
| "losses/total": 0.6915292739868164, |
| "ref_logps/chosen": -36.346763610839844, |
| "ref_logps/rejected": -44.65103530883789, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.170139878988266, |
| "rewards/margins": 0.028389303013682365, |
| "rewards/rejected": -0.1985291838645935, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 6.581489224854424, |
| "learning_rate": 4.691011235955056e-07, |
| "logps/chosen": -39.37269973754883, |
| "logps/rejected": -42.562713623046875, |
| "loss": 0.6729, |
| "losses/dpo": 0.6604301333427429, |
| "losses/sft": 1.2340155839920044, |
| "losses/total": 0.6604301333427429, |
| "ref_logps/chosen": -37.8352165222168, |
| "ref_logps/rejected": -40.57086181640625, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.15374788641929626, |
| "rewards/margins": 0.04543708637356758, |
| "rewards/rejected": -0.19918496906757355, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 6.270954655004902, |
| "learning_rate": 4.6769662921348315e-07, |
| "logps/chosen": -35.57749938964844, |
| "logps/rejected": -43.94036102294922, |
| "loss": 0.6719, |
| "losses/dpo": 0.6689096689224243, |
| "losses/sft": 1.3980541229248047, |
| "losses/total": 0.6689096689224243, |
| "ref_logps/chosen": -33.870826721191406, |
| "ref_logps/rejected": -41.761024475097656, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.1706671416759491, |
| "rewards/margins": 0.0472659207880497, |
| "rewards/rejected": -0.2179330587387085, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 5.825145606750587, |
| "learning_rate": 4.662921348314606e-07, |
| "logps/chosen": -36.180145263671875, |
| "logps/rejected": -42.19972229003906, |
| "loss": 0.6867, |
| "losses/dpo": 0.6951602697372437, |
| "losses/sft": 1.4974910020828247, |
| "losses/total": 0.6951602697372437, |
| "ref_logps/chosen": -34.28754425048828, |
| "ref_logps/rejected": -40.13561248779297, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": -0.18926027417182922, |
| "rewards/margins": 0.017150741070508957, |
| "rewards/rejected": -0.20641100406646729, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 5.954970035091233, |
| "learning_rate": 4.6488764044943816e-07, |
| "logps/chosen": -41.472923278808594, |
| "logps/rejected": -45.73348617553711, |
| "loss": 0.6781, |
| "losses/dpo": 0.667303204536438, |
| "losses/sft": 1.494096040725708, |
| "losses/total": 0.667303204536438, |
| "ref_logps/chosen": -39.698944091796875, |
| "ref_logps/rejected": -43.6091423034668, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.177398219704628, |
| "rewards/margins": 0.03503631800413132, |
| "rewards/rejected": -0.2124345451593399, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 6.488401314246342, |
| "learning_rate": 4.634831460674157e-07, |
| "logps/chosen": -39.84260177612305, |
| "logps/rejected": -49.195159912109375, |
| "loss": 0.6716, |
| "losses/dpo": 0.6679590940475464, |
| "losses/sft": 1.3698948621749878, |
| "losses/total": 0.6679590940475464, |
| "ref_logps/chosen": -37.98674774169922, |
| "ref_logps/rejected": -46.86464309692383, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.18558543920516968, |
| "rewards/margins": 0.047466084361076355, |
| "rewards/rejected": -0.23305150866508484, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.139969478930884, |
| "learning_rate": 4.620786516853932e-07, |
| "logps/chosen": -36.54951858520508, |
| "logps/rejected": -42.6442756652832, |
| "loss": 0.6689, |
| "losses/dpo": 0.650477409362793, |
| "losses/sft": 1.350743055343628, |
| "losses/total": 0.650477409362793, |
| "ref_logps/chosen": -34.77081298828125, |
| "ref_logps/rejected": -40.319297790527344, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.17787054181098938, |
| "rewards/margins": 0.05462724715471268, |
| "rewards/rejected": -0.23249778151512146, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 6.626542224506714, |
| "learning_rate": 4.606741573033708e-07, |
| "logps/chosen": -38.910194396972656, |
| "logps/rejected": -44.71943664550781, |
| "loss": 0.6588, |
| "losses/dpo": 0.6475476622581482, |
| "losses/sft": 1.0136208534240723, |
| "losses/total": 0.6475476622581482, |
| "ref_logps/chosen": -37.08655548095703, |
| "ref_logps/rejected": -42.12825393676758, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.18236377835273743, |
| "rewards/margins": 0.0767548531293869, |
| "rewards/rejected": -0.2591186463832855, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.316526648907962, |
| "learning_rate": 4.592696629213483e-07, |
| "logps/chosen": -39.12900924682617, |
| "logps/rejected": -47.94546890258789, |
| "loss": 0.6741, |
| "losses/dpo": 0.6746849417686462, |
| "losses/sft": 1.3253227472305298, |
| "losses/total": 0.6746849417686462, |
| "ref_logps/chosen": -36.78126525878906, |
| "ref_logps/rejected": -45.148887634277344, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.23477408289909363, |
| "rewards/margins": 0.044883839786052704, |
| "rewards/rejected": -0.27965790033340454, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 6.029340644383451, |
| "learning_rate": 4.5786516853932584e-07, |
| "logps/chosen": -37.168025970458984, |
| "logps/rejected": -43.3531494140625, |
| "loss": 0.6776, |
| "losses/dpo": 0.708085298538208, |
| "losses/sft": 1.549338698387146, |
| "losses/total": 0.708085298538208, |
| "ref_logps/chosen": -34.82072067260742, |
| "ref_logps/rejected": -40.60224151611328, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.2347305417060852, |
| "rewards/margins": 0.04036000370979309, |
| "rewards/rejected": -0.2750905454158783, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 6.206457245959275, |
| "learning_rate": 4.5646067415730334e-07, |
| "logps/chosen": -37.381324768066406, |
| "logps/rejected": -44.06721878051758, |
| "loss": 0.667, |
| "losses/dpo": 0.6923149228096008, |
| "losses/sft": 1.499281883239746, |
| "losses/total": 0.6923149228096008, |
| "ref_logps/chosen": -35.004127502441406, |
| "ref_logps/rejected": -41.090354919433594, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.23771986365318298, |
| "rewards/margins": 0.05996667221188545, |
| "rewards/rejected": -0.29768651723861694, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 6.238705763349497, |
| "learning_rate": 4.550561797752809e-07, |
| "logps/chosen": -38.5302734375, |
| "logps/rejected": -48.384620666503906, |
| "loss": 0.6669, |
| "losses/dpo": 0.6794298887252808, |
| "losses/sft": 1.3331537246704102, |
| "losses/total": 0.6794298887252808, |
| "ref_logps/chosen": -35.968894958496094, |
| "ref_logps/rejected": -45.22618103027344, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.25613832473754883, |
| "rewards/margins": 0.05970541387796402, |
| "rewards/rejected": -0.31584370136260986, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 6.369104792363388, |
| "learning_rate": 4.536516853932584e-07, |
| "logps/chosen": -38.55243682861328, |
| "logps/rejected": -46.81627655029297, |
| "loss": 0.6609, |
| "losses/dpo": 0.6863117218017578, |
| "losses/sft": 1.404316782951355, |
| "losses/total": 0.6863117218017578, |
| "ref_logps/chosen": -35.96749496459961, |
| "ref_logps/rejected": -43.47722625732422, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.25849413871765137, |
| "rewards/margins": 0.07541059702634811, |
| "rewards/rejected": -0.3339047431945801, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 6.15339242747321, |
| "learning_rate": 4.522471910112359e-07, |
| "logps/chosen": -39.58115005493164, |
| "logps/rejected": -44.1653938293457, |
| "loss": 0.6782, |
| "losses/dpo": 0.7283678650856018, |
| "losses/sft": 1.3683419227600098, |
| "losses/total": 0.7283678650856018, |
| "ref_logps/chosen": -37.06593704223633, |
| "ref_logps/rejected": -41.25994873046875, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.25152140855789185, |
| "rewards/margins": 0.03902304172515869, |
| "rewards/rejected": -0.29054442048072815, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.3073523395391105, |
| "learning_rate": 4.5084269662921347e-07, |
| "logps/chosen": -39.416324615478516, |
| "logps/rejected": -45.20884323120117, |
| "loss": 0.6702, |
| "losses/dpo": 0.6884989738464355, |
| "losses/sft": 1.2989376783370972, |
| "losses/total": 0.6884989738464355, |
| "ref_logps/chosen": -36.72044372558594, |
| "ref_logps/rejected": -41.92823791503906, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.2695878744125366, |
| "rewards/margins": 0.058472514152526855, |
| "rewards/rejected": -0.3280603885650635, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 6.353149418167083, |
| "learning_rate": 4.4943820224719097e-07, |
| "logps/chosen": -39.451934814453125, |
| "logps/rejected": -45.58893585205078, |
| "loss": 0.6758, |
| "losses/dpo": 0.6700998544692993, |
| "losses/sft": 1.423154354095459, |
| "losses/total": 0.6700998544692993, |
| "ref_logps/chosen": -36.6813850402832, |
| "ref_logps/rejected": -42.35654830932617, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": -0.2770548164844513, |
| "rewards/margins": 0.0461842380464077, |
| "rewards/rejected": -0.3232390582561493, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 6.243464205666771, |
| "learning_rate": 4.4803370786516853e-07, |
| "logps/chosen": -38.50192642211914, |
| "logps/rejected": -44.94510269165039, |
| "loss": 0.6654, |
| "losses/dpo": 0.6559799909591675, |
| "losses/sft": 1.3645029067993164, |
| "losses/total": 0.6559799909591675, |
| "ref_logps/chosen": -35.83762741088867, |
| "ref_logps/rejected": -41.636741638183594, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.2664298415184021, |
| "rewards/margins": 0.06440602242946625, |
| "rewards/rejected": -0.33083584904670715, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 6.529746915602167, |
| "learning_rate": 4.4662921348314603e-07, |
| "logps/chosen": -38.366634368896484, |
| "logps/rejected": -48.25501251220703, |
| "loss": 0.6701, |
| "losses/dpo": 0.6894055008888245, |
| "losses/sft": 1.4073951244354248, |
| "losses/total": 0.6894055008888245, |
| "ref_logps/chosen": -35.34041976928711, |
| "ref_logps/rejected": -44.6351318359375, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.3026217818260193, |
| "rewards/margins": 0.05936632677912712, |
| "rewards/rejected": -0.3619880974292755, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 6.186041404774562, |
| "learning_rate": 4.452247191011236e-07, |
| "logps/chosen": -37.969024658203125, |
| "logps/rejected": -46.56663131713867, |
| "loss": 0.6582, |
| "losses/dpo": 0.6372844576835632, |
| "losses/sft": 1.1740036010742188, |
| "losses/total": 0.6372844576835632, |
| "ref_logps/chosen": -35.09920883178711, |
| "ref_logps/rejected": -42.874725341796875, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.2869817614555359, |
| "rewards/margins": 0.08220900595188141, |
| "rewards/rejected": -0.3691907525062561, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 6.41463598459145, |
| "learning_rate": 4.438202247191011e-07, |
| "logps/chosen": -43.05072021484375, |
| "logps/rejected": -49.178314208984375, |
| "loss": 0.656, |
| "losses/dpo": 0.6740515232086182, |
| "losses/sft": 1.4272187948226929, |
| "losses/total": 0.6740515232086182, |
| "ref_logps/chosen": -39.619014739990234, |
| "ref_logps/rejected": -44.83480453491211, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.3431706726551056, |
| "rewards/margins": 0.09118058532476425, |
| "rewards/rejected": -0.43435126543045044, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 6.840871211971457, |
| "learning_rate": 4.4241573033707865e-07, |
| "logps/chosen": -43.888370513916016, |
| "logps/rejected": -47.332916259765625, |
| "loss": 0.6729, |
| "losses/dpo": 0.666955828666687, |
| "losses/sft": 1.6874582767486572, |
| "losses/total": 0.666955828666687, |
| "ref_logps/chosen": -40.38330841064453, |
| "ref_logps/rejected": -43.3054084777832, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.350506067276001, |
| "rewards/margins": 0.05224461108446121, |
| "rewards/rejected": -0.4027506709098816, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 6.882624120223548, |
| "learning_rate": 4.410112359550562e-07, |
| "logps/chosen": -40.2237548828125, |
| "logps/rejected": -46.99496078491211, |
| "loss": 0.6607, |
| "losses/dpo": 0.6718687415122986, |
| "losses/sft": 1.5186784267425537, |
| "losses/total": 0.6718687415122986, |
| "ref_logps/chosen": -36.70365905761719, |
| "ref_logps/rejected": -42.63638687133789, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.3520098030567169, |
| "rewards/margins": 0.08384796977043152, |
| "rewards/rejected": -0.43585777282714844, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.3488191331703385, |
| "learning_rate": 4.3960674157303366e-07, |
| "logps/chosen": -40.38496780395508, |
| "logps/rejected": -46.7673454284668, |
| "loss": 0.6535, |
| "losses/dpo": 0.6566940546035767, |
| "losses/sft": 1.3071130514144897, |
| "losses/total": 0.6566940546035767, |
| "ref_logps/chosen": -37.20966339111328, |
| "ref_logps/rejected": -42.63634490966797, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.3175300061702728, |
| "rewards/margins": 0.09556981176137924, |
| "rewards/rejected": -0.4130997955799103, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 6.624302993852389, |
| "learning_rate": 4.382022471910112e-07, |
| "logps/chosen": -42.17374801635742, |
| "logps/rejected": -49.17514419555664, |
| "loss": 0.6571, |
| "losses/dpo": 0.6181658506393433, |
| "losses/sft": 1.3204035758972168, |
| "losses/total": 0.6181658506393433, |
| "ref_logps/chosen": -38.54387664794922, |
| "ref_logps/rejected": -44.659461975097656, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.3629874587059021, |
| "rewards/margins": 0.08858054131269455, |
| "rewards/rejected": -0.45156803727149963, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 6.569663592940448, |
| "learning_rate": 4.367977528089887e-07, |
| "logps/chosen": -39.99671936035156, |
| "logps/rejected": -48.49413299560547, |
| "loss": 0.6562, |
| "losses/dpo": 0.6639370322227478, |
| "losses/sft": 1.6048388481140137, |
| "losses/total": 0.6639370322227478, |
| "ref_logps/chosen": -36.648887634277344, |
| "ref_logps/rejected": -44.24271774291992, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.3347826600074768, |
| "rewards/margins": 0.09035841375589371, |
| "rewards/rejected": -0.4251410961151123, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 6.597580499931281, |
| "learning_rate": 4.353932584269663e-07, |
| "logps/chosen": -41.4986572265625, |
| "logps/rejected": -48.67082214355469, |
| "loss": 0.6519, |
| "losses/dpo": 0.6654509902000427, |
| "losses/sft": 1.462377905845642, |
| "losses/total": 0.6654509902000427, |
| "ref_logps/chosen": -37.87129211425781, |
| "ref_logps/rejected": -43.97351837158203, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.3627370595932007, |
| "rewards/margins": 0.10699345916509628, |
| "rewards/rejected": -0.46973055601119995, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 6.479183906632411, |
| "learning_rate": 4.339887640449438e-07, |
| "logps/chosen": -41.78961944580078, |
| "logps/rejected": -47.387901306152344, |
| "loss": 0.6791, |
| "losses/dpo": 0.6730961799621582, |
| "losses/sft": 1.1305738687515259, |
| "losses/total": 0.6730961799621582, |
| "ref_logps/chosen": -37.842323303222656, |
| "ref_logps/rejected": -42.93647766113281, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.3947296738624573, |
| "rewards/margins": 0.05041254311800003, |
| "rewards/rejected": -0.4451422691345215, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 6.926719176011086, |
| "learning_rate": 4.3258426966292134e-07, |
| "logps/chosen": -43.21299743652344, |
| "logps/rejected": -47.084434509277344, |
| "loss": 0.6673, |
| "losses/dpo": 0.6536482572555542, |
| "losses/sft": 1.2500860691070557, |
| "losses/total": 0.6536482572555542, |
| "ref_logps/chosen": -39.38795471191406, |
| "ref_logps/rejected": -42.582969665527344, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.38250401616096497, |
| "rewards/margins": 0.06764230877161026, |
| "rewards/rejected": -0.45014631748199463, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 6.563435223333862, |
| "learning_rate": 4.311797752808989e-07, |
| "logps/chosen": -40.25920104980469, |
| "logps/rejected": -49.489097595214844, |
| "loss": 0.6508, |
| "losses/dpo": 0.6591900587081909, |
| "losses/sft": 1.3429124355316162, |
| "losses/total": 0.6591900587081909, |
| "ref_logps/chosen": -36.316349029541016, |
| "ref_logps/rejected": -44.49497985839844, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.3942852020263672, |
| "rewards/margins": 0.1051262766122818, |
| "rewards/rejected": -0.4994114637374878, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 6.671599802331672, |
| "learning_rate": 4.297752808988764e-07, |
| "logps/chosen": -42.98493957519531, |
| "logps/rejected": -45.29029846191406, |
| "loss": 0.6775, |
| "losses/dpo": 0.6989056468009949, |
| "losses/sft": 1.7236398458480835, |
| "losses/total": 0.6989056468009949, |
| "ref_logps/chosen": -38.74510955810547, |
| "ref_logps/rejected": -40.55065155029297, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.4239833354949951, |
| "rewards/margins": 0.04998103156685829, |
| "rewards/rejected": -0.4739643633365631, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.921605107059482, |
| "learning_rate": 4.2837078651685396e-07, |
| "logps/chosen": -42.04779052734375, |
| "logps/rejected": -47.75447463989258, |
| "loss": 0.6577, |
| "losses/dpo": 0.6272084712982178, |
| "losses/sft": 1.5017703771591187, |
| "losses/total": 0.6272084712982178, |
| "ref_logps/chosen": -37.97248458862305, |
| "ref_logps/rejected": -42.7065315246582, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.40753045678138733, |
| "rewards/margins": 0.09726397693157196, |
| "rewards/rejected": -0.5047944784164429, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 6.759776662372792, |
| "learning_rate": 4.269662921348314e-07, |
| "logps/chosen": -44.38639831542969, |
| "logps/rejected": -53.21236038208008, |
| "loss": 0.6431, |
| "losses/dpo": 0.7165791988372803, |
| "losses/sft": 1.5609912872314453, |
| "losses/total": 0.7165791988372803, |
| "ref_logps/chosen": -39.95743942260742, |
| "ref_logps/rejected": -47.561588287353516, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.4428956210613251, |
| "rewards/margins": 0.122181735932827, |
| "rewards/rejected": -0.5650773644447327, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 6.767540666000382, |
| "learning_rate": 4.2556179775280896e-07, |
| "logps/chosen": -39.6769905090332, |
| "logps/rejected": -45.86317443847656, |
| "loss": 0.6772, |
| "losses/dpo": 0.6330491900444031, |
| "losses/sft": 1.33146333694458, |
| "losses/total": 0.6330491900444031, |
| "ref_logps/chosen": -35.39988708496094, |
| "ref_logps/rejected": -41.038116455078125, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.42771056294441223, |
| "rewards/margins": 0.05479476973414421, |
| "rewards/rejected": -0.4825053811073303, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 6.521925535129618, |
| "learning_rate": 4.2415730337078647e-07, |
| "logps/chosen": -43.78227996826172, |
| "logps/rejected": -47.82459259033203, |
| "loss": 0.6607, |
| "losses/dpo": 0.6985595226287842, |
| "losses/sft": 1.530924677848816, |
| "losses/total": 0.6985595226287842, |
| "ref_logps/chosen": -39.28633499145508, |
| "ref_logps/rejected": -42.36700439453125, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.4495944082736969, |
| "rewards/margins": 0.09616444259881973, |
| "rewards/rejected": -0.545758843421936, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.606025652021848, |
| "learning_rate": 4.22752808988764e-07, |
| "logps/chosen": -44.53917694091797, |
| "logps/rejected": -49.32555389404297, |
| "loss": 0.6783, |
| "losses/dpo": 0.6152039766311646, |
| "losses/sft": 1.5025076866149902, |
| "losses/total": 0.6152039766311646, |
| "ref_logps/chosen": -39.66822814941406, |
| "ref_logps/rejected": -43.90290451049805, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.4870951175689697, |
| "rewards/margins": 0.05516959726810455, |
| "rewards/rejected": -0.5422646999359131, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 6.456362063653043, |
| "learning_rate": 4.2134831460674153e-07, |
| "logps/chosen": -40.66051483154297, |
| "logps/rejected": -50.35266876220703, |
| "loss": 0.6339, |
| "losses/dpo": 0.5940225124359131, |
| "losses/sft": 1.3329205513000488, |
| "losses/total": 0.5940225124359131, |
| "ref_logps/chosen": -36.583961486816406, |
| "ref_logps/rejected": -44.78839874267578, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.40765535831451416, |
| "rewards/margins": 0.14877161383628845, |
| "rewards/rejected": -0.556427001953125, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 7.002732175851258, |
| "learning_rate": 4.199438202247191e-07, |
| "logps/chosen": -40.17961502075195, |
| "logps/rejected": -50.040138244628906, |
| "loss": 0.6253, |
| "losses/dpo": 0.597855806350708, |
| "losses/sft": 1.5503275394439697, |
| "losses/total": 0.597855806350708, |
| "ref_logps/chosen": -35.91657257080078, |
| "ref_logps/rejected": -44.05973815917969, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.426303893327713, |
| "rewards/margins": 0.17173629999160767, |
| "rewards/rejected": -0.5980401635169983, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 6.7690281568226345, |
| "learning_rate": 4.1853932584269664e-07, |
| "logps/chosen": -43.26731872558594, |
| "logps/rejected": -48.155426025390625, |
| "loss": 0.6528, |
| "losses/dpo": 0.6972070932388306, |
| "losses/sft": 1.3802154064178467, |
| "losses/total": 0.6972070932388306, |
| "ref_logps/chosen": -38.819725036621094, |
| "ref_logps/rejected": -42.62653350830078, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.4447590410709381, |
| "rewards/margins": 0.10813023149967194, |
| "rewards/rejected": -0.5528892278671265, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 6.184786618255584, |
| "learning_rate": 4.1713483146067415e-07, |
| "logps/chosen": -39.052734375, |
| "logps/rejected": -45.65272521972656, |
| "loss": 0.6289, |
| "losses/dpo": 0.5835955142974854, |
| "losses/sft": 1.2479004859924316, |
| "losses/total": 0.5835955142974854, |
| "ref_logps/chosen": -35.05288314819336, |
| "ref_logps/rejected": -40.07136154174805, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.39998501539230347, |
| "rewards/margins": 0.15815110504627228, |
| "rewards/rejected": -0.5581361055374146, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 6.925410262368385, |
| "learning_rate": 4.157303370786517e-07, |
| "logps/chosen": -44.249752044677734, |
| "logps/rejected": -44.935245513916016, |
| "loss": 0.6711, |
| "losses/dpo": 0.5667402744293213, |
| "losses/sft": 1.424223780632019, |
| "losses/total": 0.5667402744293213, |
| "ref_logps/chosen": -38.972206115722656, |
| "ref_logps/rejected": -38.912513732910156, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.5277543067932129, |
| "rewards/margins": 0.07451874017715454, |
| "rewards/rejected": -0.6022731065750122, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.586928303266985, |
| "learning_rate": 4.1432584269662915e-07, |
| "logps/chosen": -39.689693450927734, |
| "logps/rejected": -48.46234130859375, |
| "loss": 0.6509, |
| "losses/dpo": 0.6659662127494812, |
| "losses/sft": 1.3264880180358887, |
| "losses/total": 0.6659662127494812, |
| "ref_logps/chosen": -35.252235412597656, |
| "ref_logps/rejected": -42.87641525268555, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.4437457323074341, |
| "rewards/margins": 0.11484652757644653, |
| "rewards/rejected": -0.5585922598838806, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 6.103700351208487, |
| "learning_rate": 4.129213483146067e-07, |
| "logps/chosen": -38.51823425292969, |
| "logps/rejected": -43.52346420288086, |
| "loss": 0.6509, |
| "losses/dpo": 0.6539372205734253, |
| "losses/sft": 1.3750677108764648, |
| "losses/total": 0.6539372205734253, |
| "ref_logps/chosen": -34.18145751953125, |
| "ref_logps/rejected": -38.04938507080078, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.4336775541305542, |
| "rewards/margins": 0.11373014003038406, |
| "rewards/rejected": -0.5474076867103577, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 6.228970412657457, |
| "learning_rate": 4.115168539325842e-07, |
| "logps/chosen": -42.1187629699707, |
| "logps/rejected": -47.93737030029297, |
| "loss": 0.6451, |
| "losses/dpo": 0.646047055721283, |
| "losses/sft": 1.497565507888794, |
| "losses/total": 0.646047055721283, |
| "ref_logps/chosen": -37.47208786010742, |
| "ref_logps/rejected": -42.03216552734375, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.4646672010421753, |
| "rewards/margins": 0.12585340440273285, |
| "rewards/rejected": -0.5905206203460693, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 6.892504983818195, |
| "learning_rate": 4.1011235955056177e-07, |
| "logps/chosen": -42.86591339111328, |
| "logps/rejected": -48.31887435913086, |
| "loss": 0.6494, |
| "losses/dpo": 0.6817602515220642, |
| "losses/sft": 1.5651347637176514, |
| "losses/total": 0.6817602515220642, |
| "ref_logps/chosen": -37.89812088012695, |
| "ref_logps/rejected": -42.230018615722656, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.4967789351940155, |
| "rewards/margins": 0.11210669577121735, |
| "rewards/rejected": -0.608885645866394, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 6.344274461611194, |
| "learning_rate": 4.0870786516853933e-07, |
| "logps/chosen": -38.07393264770508, |
| "logps/rejected": -46.695167541503906, |
| "loss": 0.6485, |
| "losses/dpo": 0.6693782806396484, |
| "losses/sft": 1.4964573383331299, |
| "losses/total": 0.6693782806396484, |
| "ref_logps/chosen": -33.26963806152344, |
| "ref_logps/rejected": -40.731658935546875, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.48042935132980347, |
| "rewards/margins": 0.11592163890600204, |
| "rewards/rejected": -0.5963510274887085, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 6.340224167086584, |
| "learning_rate": 4.0730337078651683e-07, |
| "logps/chosen": -34.64811706542969, |
| "logps/rejected": -44.656005859375, |
| "loss": 0.6748, |
| "losses/dpo": 0.7200191020965576, |
| "losses/sft": 1.2917957305908203, |
| "losses/total": 0.7200191020965576, |
| "ref_logps/chosen": -29.991100311279297, |
| "ref_logps/rejected": -39.274078369140625, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.4657020568847656, |
| "rewards/margins": 0.07249079644680023, |
| "rewards/rejected": -0.5381928086280823, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 6.810734673228144, |
| "learning_rate": 4.058988764044944e-07, |
| "logps/chosen": -43.92599868774414, |
| "logps/rejected": -51.205841064453125, |
| "loss": 0.6618, |
| "losses/dpo": 0.7375708818435669, |
| "losses/sft": 1.6257060766220093, |
| "losses/total": 0.7375708818435669, |
| "ref_logps/chosen": -38.43563461303711, |
| "ref_logps/rejected": -44.785430908203125, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.5490366220474243, |
| "rewards/margins": 0.09300415217876434, |
| "rewards/rejected": -0.6420407295227051, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 6.646468325900178, |
| "learning_rate": 4.044943820224719e-07, |
| "logps/chosen": -41.907615661621094, |
| "logps/rejected": -47.17523956298828, |
| "loss": 0.6665, |
| "losses/dpo": 0.6535848379135132, |
| "losses/sft": 1.5487432479858398, |
| "losses/total": 0.6535848379135132, |
| "ref_logps/chosen": -36.636451721191406, |
| "ref_logps/rejected": -41.005767822265625, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.5271163582801819, |
| "rewards/margins": 0.08983068913221359, |
| "rewards/rejected": -0.6169470548629761, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 6.796059919133426, |
| "learning_rate": 4.0308988764044945e-07, |
| "logps/chosen": -43.95292663574219, |
| "logps/rejected": -48.59518814086914, |
| "loss": 0.6619, |
| "losses/dpo": 0.6743461489677429, |
| "losses/sft": 1.5721744298934937, |
| "losses/total": 0.6743461489677429, |
| "ref_logps/chosen": -38.58289337158203, |
| "ref_logps/rejected": -42.209529876708984, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.5370036363601685, |
| "rewards/margins": 0.10156210511922836, |
| "rewards/rejected": -0.6385657787322998, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 6.472585584476915, |
| "learning_rate": 4.0168539325842696e-07, |
| "logps/chosen": -40.216651916503906, |
| "logps/rejected": -45.985801696777344, |
| "loss": 0.6793, |
| "losses/dpo": 0.7015002965927124, |
| "losses/sft": 1.661520004272461, |
| "losses/total": 0.7015002965927124, |
| "ref_logps/chosen": -34.707008361816406, |
| "ref_logps/rejected": -39.90086364746094, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": -0.5509647130966187, |
| "rewards/margins": 0.05752916634082794, |
| "rewards/rejected": -0.6084938049316406, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 6.441943056630329, |
| "learning_rate": 4.0028089887640446e-07, |
| "logps/chosen": -40.84614944458008, |
| "logps/rejected": -49.78240966796875, |
| "loss": 0.6495, |
| "losses/dpo": 0.6826507449150085, |
| "losses/sft": 1.6292600631713867, |
| "losses/total": 0.6826507449150085, |
| "ref_logps/chosen": -35.30500411987305, |
| "ref_logps/rejected": -43.05198669433594, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.554114580154419, |
| "rewards/margins": 0.1189279854297638, |
| "rewards/rejected": -0.6730425953865051, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 6.739697461780844, |
| "learning_rate": 3.9887640449438196e-07, |
| "logps/chosen": -43.101287841796875, |
| "logps/rejected": -51.60324478149414, |
| "loss": 0.6365, |
| "losses/dpo": 0.617262601852417, |
| "losses/sft": 1.4229466915130615, |
| "losses/total": 0.617262601852417, |
| "ref_logps/chosen": -37.866512298583984, |
| "ref_logps/rejected": -44.80317687988281, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.52347731590271, |
| "rewards/margins": 0.1565295159816742, |
| "rewards/rejected": -0.6800068020820618, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 7.023593025692052, |
| "learning_rate": 3.974719101123595e-07, |
| "logps/chosen": -41.23580551147461, |
| "logps/rejected": -52.932403564453125, |
| "loss": 0.621, |
| "losses/dpo": 0.6078984141349792, |
| "losses/sft": 1.4510893821716309, |
| "losses/total": 0.6078984141349792, |
| "ref_logps/chosen": -36.275489807128906, |
| "ref_logps/rejected": -46.033939361572266, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.49603164196014404, |
| "rewards/margins": 0.19381484389305115, |
| "rewards/rejected": -0.6898465156555176, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 6.441040652132362, |
| "learning_rate": 3.960674157303371e-07, |
| "logps/chosen": -38.938751220703125, |
| "logps/rejected": -47.65938186645508, |
| "loss": 0.6436, |
| "losses/dpo": 0.6575403809547424, |
| "losses/sft": 1.4100581407546997, |
| "losses/total": 0.6575403809547424, |
| "ref_logps/chosen": -33.69282531738281, |
| "ref_logps/rejected": -41.06393814086914, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.5245928764343262, |
| "rewards/margins": 0.13495120406150818, |
| "rewards/rejected": -0.6595441102981567, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 6.831582112977574, |
| "learning_rate": 3.946629213483146e-07, |
| "logps/chosen": -41.802799224853516, |
| "logps/rejected": -49.96432876586914, |
| "loss": 0.6368, |
| "losses/dpo": 0.6237789392471313, |
| "losses/sft": 1.5177757740020752, |
| "losses/total": 0.6237789392471313, |
| "ref_logps/chosen": -36.240840911865234, |
| "ref_logps/rejected": -42.76991653442383, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.5561960935592651, |
| "rewards/margins": 0.16324520111083984, |
| "rewards/rejected": -0.719441294670105, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 7.410431235906546, |
| "learning_rate": 3.9325842696629214e-07, |
| "logps/chosen": -45.350669860839844, |
| "logps/rejected": -48.64668655395508, |
| "loss": 0.6848, |
| "losses/dpo": 0.746525228023529, |
| "losses/sft": 1.8295865058898926, |
| "losses/total": 0.746525228023529, |
| "ref_logps/chosen": -39.40565490722656, |
| "ref_logps/rejected": -42.114837646484375, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.5945014357566833, |
| "rewards/margins": 0.058684106916189194, |
| "rewards/rejected": -0.6531856060028076, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 6.7521875642568245, |
| "learning_rate": 3.9185393258426964e-07, |
| "logps/chosen": -42.558738708496094, |
| "logps/rejected": -48.05232238769531, |
| "loss": 0.6395, |
| "losses/dpo": 0.6771230697631836, |
| "losses/sft": 1.4978280067443848, |
| "losses/total": 0.6771230697631836, |
| "ref_logps/chosen": -37.11561584472656, |
| "ref_logps/rejected": -41.03240203857422, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.5443119406700134, |
| "rewards/margins": 0.1576804369688034, |
| "rewards/rejected": -0.701992392539978, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 6.852926877450363, |
| "learning_rate": 3.904494382022472e-07, |
| "logps/chosen": -43.11158752441406, |
| "logps/rejected": -50.49040985107422, |
| "loss": 0.6294, |
| "losses/dpo": 0.6048296689987183, |
| "losses/sft": 1.4263670444488525, |
| "losses/total": 0.6048296689987183, |
| "ref_logps/chosen": -37.525428771972656, |
| "ref_logps/rejected": -43.2536735534668, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.5586156845092773, |
| "rewards/margins": 0.16505761444568634, |
| "rewards/rejected": -0.7236733436584473, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 7.482804237101483, |
| "learning_rate": 3.890449438202247e-07, |
| "logps/chosen": -42.81207275390625, |
| "logps/rejected": -48.305213928222656, |
| "loss": 0.6723, |
| "losses/dpo": 0.6438789367675781, |
| "losses/sft": 1.3842287063598633, |
| "losses/total": 0.6438789367675781, |
| "ref_logps/chosen": -37.165802001953125, |
| "ref_logps/rejected": -41.8455696105957, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.5646266937255859, |
| "rewards/margins": 0.08133774995803833, |
| "rewards/rejected": -0.6459644436836243, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.612121590764, |
| "learning_rate": 3.876404494382022e-07, |
| "logps/chosen": -40.42414093017578, |
| "logps/rejected": -49.36077880859375, |
| "loss": 0.6449, |
| "losses/dpo": 0.6813696622848511, |
| "losses/sft": 1.6905653476715088, |
| "losses/total": 0.6813696622848511, |
| "ref_logps/chosen": -34.49721145629883, |
| "ref_logps/rejected": -42.062259674072266, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.5926928520202637, |
| "rewards/margins": 0.13715943694114685, |
| "rewards/rejected": -0.7298523187637329, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 6.529474424321076, |
| "learning_rate": 3.8623595505617977e-07, |
| "logps/chosen": -43.11798095703125, |
| "logps/rejected": -51.801422119140625, |
| "loss": 0.6396, |
| "losses/dpo": 0.6661785840988159, |
| "losses/sft": 1.4052226543426514, |
| "losses/total": 0.6661785840988159, |
| "ref_logps/chosen": -37.71819305419922, |
| "ref_logps/rejected": -44.816795349121094, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.5399786233901978, |
| "rewards/margins": 0.15848389267921448, |
| "rewards/rejected": -0.6984626054763794, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 6.759925517450978, |
| "learning_rate": 3.8483146067415727e-07, |
| "logps/chosen": -44.635955810546875, |
| "logps/rejected": -53.689002990722656, |
| "loss": 0.6172, |
| "losses/dpo": 0.5884010195732117, |
| "losses/sft": 1.7550606727600098, |
| "losses/total": 0.5884010195732117, |
| "ref_logps/chosen": -38.62323760986328, |
| "ref_logps/rejected": -45.53942108154297, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6012718677520752, |
| "rewards/margins": 0.21368616819381714, |
| "rewards/rejected": -0.8149580359458923, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 6.5150883611431, |
| "learning_rate": 3.834269662921348e-07, |
| "logps/chosen": -41.34044647216797, |
| "logps/rejected": -50.817466735839844, |
| "loss": 0.6373, |
| "losses/dpo": 0.6241766214370728, |
| "losses/sft": 1.3161594867706299, |
| "losses/total": 0.6241766214370728, |
| "ref_logps/chosen": -35.719482421875, |
| "ref_logps/rejected": -43.593727111816406, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.562096118927002, |
| "rewards/margins": 0.16027754545211792, |
| "rewards/rejected": -0.7223736047744751, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 7.117780641865591, |
| "learning_rate": 3.8202247191011233e-07, |
| "logps/chosen": -40.98164367675781, |
| "logps/rejected": -46.440032958984375, |
| "loss": 0.6259, |
| "losses/dpo": 0.7275031805038452, |
| "losses/sft": 1.3312557935714722, |
| "losses/total": 0.7275031805038452, |
| "ref_logps/chosen": -36.18989181518555, |
| "ref_logps/rejected": -39.75829315185547, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.4791754186153412, |
| "rewards/margins": 0.1889985203742981, |
| "rewards/rejected": -0.6681739091873169, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 6.810891199884393, |
| "learning_rate": 3.806179775280899e-07, |
| "logps/chosen": -45.05056381225586, |
| "logps/rejected": -51.01411819458008, |
| "loss": 0.6422, |
| "losses/dpo": 0.5984268188476562, |
| "losses/sft": 1.6079349517822266, |
| "losses/total": 0.5984268188476562, |
| "ref_logps/chosen": -39.050132751464844, |
| "ref_logps/rejected": -43.51178741455078, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.6000430583953857, |
| "rewards/margins": 0.15018987655639648, |
| "rewards/rejected": -0.750232994556427, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 7.000300157233934, |
| "learning_rate": 3.792134831460674e-07, |
| "logps/chosen": -45.488685607910156, |
| "logps/rejected": -53.24082946777344, |
| "loss": 0.6293, |
| "losses/dpo": 0.6512585878372192, |
| "losses/sft": 1.70095694065094, |
| "losses/total": 0.6512585878372192, |
| "ref_logps/chosen": -39.463134765625, |
| "ref_logps/rejected": -45.5393180847168, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.602555513381958, |
| "rewards/margins": 0.16759565472602844, |
| "rewards/rejected": -0.7701511383056641, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 6.866553634275337, |
| "learning_rate": 3.7780898876404495e-07, |
| "logps/chosen": -46.352333068847656, |
| "logps/rejected": -50.944129943847656, |
| "loss": 0.6501, |
| "losses/dpo": 0.7261393070220947, |
| "losses/sft": 1.7794756889343262, |
| "losses/total": 0.7261393070220947, |
| "ref_logps/chosen": -40.64281463623047, |
| "ref_logps/rejected": -43.911109924316406, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5709517002105713, |
| "rewards/margins": 0.13234999775886536, |
| "rewards/rejected": -0.7033016681671143, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 7.1746623953368385, |
| "learning_rate": 3.7640449438202245e-07, |
| "logps/chosen": -42.81266403198242, |
| "logps/rejected": -47.58647155761719, |
| "loss": 0.6484, |
| "losses/dpo": 0.6509548425674438, |
| "losses/sft": 1.4893585443496704, |
| "losses/total": 0.6509548425674438, |
| "ref_logps/chosen": -36.92261505126953, |
| "ref_logps/rejected": -40.24052429199219, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.5890049338340759, |
| "rewards/margins": 0.14558979868888855, |
| "rewards/rejected": -0.7345947623252869, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 6.639733312989805, |
| "learning_rate": 3.75e-07, |
| "logps/chosen": -42.699241638183594, |
| "logps/rejected": -49.38917922973633, |
| "loss": 0.6384, |
| "losses/dpo": 0.7398217916488647, |
| "losses/sft": 1.8203296661376953, |
| "losses/total": 0.7398217916488647, |
| "ref_logps/chosen": -36.63134002685547, |
| "ref_logps/rejected": -41.697608947753906, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.6067899465560913, |
| "rewards/margins": 0.16236720979213715, |
| "rewards/rejected": -0.7691571712493896, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 6.840872832592906, |
| "learning_rate": 3.735955056179775e-07, |
| "logps/chosen": -38.55268096923828, |
| "logps/rejected": -46.57276153564453, |
| "loss": 0.6455, |
| "losses/dpo": 0.6873192191123962, |
| "losses/sft": 1.3916716575622559, |
| "losses/total": 0.6873192191123962, |
| "ref_logps/chosen": -33.59415054321289, |
| "ref_logps/rejected": -40.15166091918945, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.4958529472351074, |
| "rewards/margins": 0.14625714719295502, |
| "rewards/rejected": -0.6421101093292236, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 7.001016051342946, |
| "learning_rate": 3.72191011235955e-07, |
| "logps/chosen": -42.6187629699707, |
| "logps/rejected": -47.61820983886719, |
| "loss": 0.6507, |
| "losses/dpo": 0.555785059928894, |
| "losses/sft": 1.6289881467819214, |
| "losses/total": 0.555785059928894, |
| "ref_logps/chosen": -36.840423583984375, |
| "ref_logps/rejected": -40.43097686767578, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.5778340101242065, |
| "rewards/margins": 0.14088886976242065, |
| "rewards/rejected": -0.7187228202819824, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.69674019742392, |
| "learning_rate": 3.707865168539326e-07, |
| "logps/chosen": -42.53615951538086, |
| "logps/rejected": -50.24591064453125, |
| "loss": 0.639, |
| "losses/dpo": 0.658089280128479, |
| "losses/sft": 1.6500622034072876, |
| "losses/total": 0.658089280128479, |
| "ref_logps/chosen": -36.53786087036133, |
| "ref_logps/rejected": -42.35614776611328, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.599829912185669, |
| "rewards/margins": 0.1891460418701172, |
| "rewards/rejected": -0.7889760136604309, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.641666012117393, |
| "learning_rate": 3.693820224719101e-07, |
| "logps/chosen": -40.87996292114258, |
| "logps/rejected": -51.007049560546875, |
| "loss": 0.6244, |
| "losses/dpo": 0.6069691181182861, |
| "losses/sft": 1.3164952993392944, |
| "losses/total": 0.6069691181182861, |
| "ref_logps/chosen": -35.29632568359375, |
| "ref_logps/rejected": -43.411521911621094, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5583640933036804, |
| "rewards/margins": 0.20118848979473114, |
| "rewards/rejected": -0.7595525979995728, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 6.588946335066842, |
| "learning_rate": 3.6797752808988764e-07, |
| "logps/chosen": -42.399169921875, |
| "logps/rejected": -51.85491180419922, |
| "loss": 0.6191, |
| "losses/dpo": 0.5947903394699097, |
| "losses/sft": 1.4862775802612305, |
| "losses/total": 0.5947903394699097, |
| "ref_logps/chosen": -36.425140380859375, |
| "ref_logps/rejected": -43.687225341796875, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.5974029898643494, |
| "rewards/margins": 0.2193659394979477, |
| "rewards/rejected": -0.8167688846588135, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 6.690035072129239, |
| "learning_rate": 3.6657303370786514e-07, |
| "logps/chosen": -41.394432067871094, |
| "logps/rejected": -53.464168548583984, |
| "loss": 0.6, |
| "losses/dpo": 0.6503059267997742, |
| "losses/sft": 1.5431207418441772, |
| "losses/total": 0.6503059267997742, |
| "ref_logps/chosen": -35.42436218261719, |
| "ref_logps/rejected": -44.949180603027344, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5970069766044617, |
| "rewards/margins": 0.25449231266975403, |
| "rewards/rejected": -0.8514993190765381, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 7.159913988692313, |
| "learning_rate": 3.651685393258427e-07, |
| "logps/chosen": -47.11418533325195, |
| "logps/rejected": -49.349937438964844, |
| "loss": 0.6642, |
| "losses/dpo": 0.70440673828125, |
| "losses/sft": 1.6172586679458618, |
| "losses/total": 0.70440673828125, |
| "ref_logps/chosen": -40.66786193847656, |
| "ref_logps/rejected": -41.70207977294922, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.6446323394775391, |
| "rewards/margins": 0.12015305459499359, |
| "rewards/rejected": -0.7647854685783386, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 6.861296978626527, |
| "learning_rate": 3.637640449438202e-07, |
| "logps/chosen": -41.73448181152344, |
| "logps/rejected": -49.953067779541016, |
| "loss": 0.6114, |
| "losses/dpo": 0.6396130323410034, |
| "losses/sft": 1.41642427444458, |
| "losses/total": 0.6396130323410034, |
| "ref_logps/chosen": -36.14557647705078, |
| "ref_logps/rejected": -42.22886657714844, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.5588902235031128, |
| "rewards/margins": 0.21353021264076233, |
| "rewards/rejected": -0.7724204063415527, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 6.8531162605456055, |
| "learning_rate": 3.6235955056179776e-07, |
| "logps/chosen": -40.27118682861328, |
| "logps/rejected": -51.58380889892578, |
| "loss": 0.6205, |
| "losses/dpo": 0.6927103400230408, |
| "losses/sft": 1.5446867942810059, |
| "losses/total": 0.6927103400230408, |
| "ref_logps/chosen": -34.374351501464844, |
| "ref_logps/rejected": -43.53186798095703, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5896837115287781, |
| "rewards/margins": 0.2155105173587799, |
| "rewards/rejected": -0.8051942586898804, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 6.581792409395151, |
| "learning_rate": 3.6095505617977526e-07, |
| "logps/chosen": -40.182411193847656, |
| "logps/rejected": -50.48395538330078, |
| "loss": 0.6351, |
| "losses/dpo": 0.5907813906669617, |
| "losses/sft": 1.5051367282867432, |
| "losses/total": 0.5907813906669617, |
| "ref_logps/chosen": -34.705772399902344, |
| "ref_logps/rejected": -43.20262908935547, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.5476638078689575, |
| "rewards/margins": 0.18046864867210388, |
| "rewards/rejected": -0.728132426738739, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 7.099683859328159, |
| "learning_rate": 3.5955056179775277e-07, |
| "logps/chosen": -47.343101501464844, |
| "logps/rejected": -54.016380310058594, |
| "loss": 0.5962, |
| "losses/dpo": 0.5484156012535095, |
| "losses/sft": 1.3340450525283813, |
| "losses/total": 0.5484156012535095, |
| "ref_logps/chosen": -40.93060302734375, |
| "ref_logps/rejected": -45.07525634765625, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.6412495374679565, |
| "rewards/margins": 0.2528632581233978, |
| "rewards/rejected": -0.8941128253936768, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 6.6246858434595435, |
| "learning_rate": 3.581460674157303e-07, |
| "logps/chosen": -42.795745849609375, |
| "logps/rejected": -50.99128341674805, |
| "loss": 0.6105, |
| "losses/dpo": 0.6333677768707275, |
| "losses/sft": 1.7310549020767212, |
| "losses/total": 0.6333677768707275, |
| "ref_logps/chosen": -36.29172897338867, |
| "ref_logps/rejected": -42.22053527832031, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6504020690917969, |
| "rewards/margins": 0.2266732007265091, |
| "rewards/rejected": -0.8770751953125, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 6.585988374877859, |
| "learning_rate": 3.5674157303370783e-07, |
| "logps/chosen": -39.767051696777344, |
| "logps/rejected": -45.62493896484375, |
| "loss": 0.6184, |
| "losses/dpo": 0.6107473373413086, |
| "losses/sft": 1.3934905529022217, |
| "losses/total": 0.6107473373413086, |
| "ref_logps/chosen": -34.30314636230469, |
| "ref_logps/rejected": -38.085365295410156, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.5463899374008179, |
| "rewards/margins": 0.20756718516349792, |
| "rewards/rejected": -0.7539570927619934, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 7.605347145021406, |
| "learning_rate": 3.553370786516854e-07, |
| "logps/chosen": -45.32689666748047, |
| "logps/rejected": -53.28538513183594, |
| "loss": 0.6396, |
| "losses/dpo": 0.6139785051345825, |
| "losses/sft": 1.4996390342712402, |
| "losses/total": 0.6139785051345825, |
| "ref_logps/chosen": -38.790870666503906, |
| "ref_logps/rejected": -44.86207580566406, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6536027789115906, |
| "rewards/margins": 0.18872803449630737, |
| "rewards/rejected": -0.842330813407898, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 6.375447848895096, |
| "learning_rate": 3.539325842696629e-07, |
| "logps/chosen": -39.950706481933594, |
| "logps/rejected": -52.67605972290039, |
| "loss": 0.6352, |
| "losses/dpo": 0.562360405921936, |
| "losses/sft": 1.5273569822311401, |
| "losses/total": 0.562360405921936, |
| "ref_logps/chosen": -33.4902458190918, |
| "ref_logps/rejected": -44.185813903808594, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6460464000701904, |
| "rewards/margins": 0.20297789573669434, |
| "rewards/rejected": -0.8490242958068848, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 6.570913856555609, |
| "learning_rate": 3.5252808988764045e-07, |
| "logps/chosen": -41.82993698120117, |
| "logps/rejected": -49.303138732910156, |
| "loss": 0.616, |
| "losses/dpo": 0.6345305442810059, |
| "losses/sft": 1.6690033674240112, |
| "losses/total": 0.6345305442810059, |
| "ref_logps/chosen": -36.14568328857422, |
| "ref_logps/rejected": -41.49669647216797, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5684253573417664, |
| "rewards/margins": 0.2122190296649933, |
| "rewards/rejected": -0.7806443572044373, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 6.513567190172644, |
| "learning_rate": 3.51123595505618e-07, |
| "logps/chosen": -41.52465057373047, |
| "logps/rejected": -53.12078094482422, |
| "loss": 0.6021, |
| "losses/dpo": 0.6336867809295654, |
| "losses/sft": 1.6118431091308594, |
| "losses/total": 0.6336867809295654, |
| "ref_logps/chosen": -35.433265686035156, |
| "ref_logps/rejected": -44.33394241333008, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.6091387271881104, |
| "rewards/margins": 0.2695454955101013, |
| "rewards/rejected": -0.8786842823028564, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 7.159126675968495, |
| "learning_rate": 3.497191011235955e-07, |
| "logps/chosen": -45.985076904296875, |
| "logps/rejected": -56.15589904785156, |
| "loss": 0.6053, |
| "losses/dpo": 0.6400711536407471, |
| "losses/sft": 1.618746042251587, |
| "losses/total": 0.6400711536407471, |
| "ref_logps/chosen": -39.94523239135742, |
| "ref_logps/rejected": -47.24679946899414, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.6039848327636719, |
| "rewards/margins": 0.2869252562522888, |
| "rewards/rejected": -0.8909100294113159, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 6.31408614588678, |
| "learning_rate": 3.48314606741573e-07, |
| "logps/chosen": -36.47071075439453, |
| "logps/rejected": -44.49042510986328, |
| "loss": 0.6059, |
| "losses/dpo": 0.6014193892478943, |
| "losses/sft": 1.4461239576339722, |
| "losses/total": 0.6014193892478943, |
| "ref_logps/chosen": -31.124897003173828, |
| "ref_logps/rejected": -36.84246063232422, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5345816016197205, |
| "rewards/margins": 0.23021462559700012, |
| "rewards/rejected": -0.7647961974143982, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 6.681653921230616, |
| "learning_rate": 3.469101123595505e-07, |
| "logps/chosen": -47.83122634887695, |
| "logps/rejected": -53.77499008178711, |
| "loss": 0.5731, |
| "losses/dpo": 0.5383450388908386, |
| "losses/sft": 1.5721720457077026, |
| "losses/total": 0.5383450388908386, |
| "ref_logps/chosen": -41.766536712646484, |
| "ref_logps/rejected": -44.522464752197266, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.6064690947532654, |
| "rewards/margins": 0.3187834620475769, |
| "rewards/rejected": -0.9252525568008423, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 6.904604135283266, |
| "learning_rate": 3.4550561797752807e-07, |
| "logps/chosen": -44.048912048339844, |
| "logps/rejected": -51.318206787109375, |
| "loss": 0.6352, |
| "losses/dpo": 0.7056742906570435, |
| "losses/sft": 1.706668496131897, |
| "losses/total": 0.7056742906570435, |
| "ref_logps/chosen": -37.56008529663086, |
| "ref_logps/rejected": -42.8933219909668, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.6488831043243408, |
| "rewards/margins": 0.19360551238059998, |
| "rewards/rejected": -0.8424886465072632, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.14, |
| "grad_norm": 6.64022622762387, |
| "learning_rate": 3.441011235955056e-07, |
| "logps/chosen": -43.570884704589844, |
| "logps/rejected": -52.240318298339844, |
| "loss": 0.593, |
| "losses/dpo": 0.6028671264648438, |
| "losses/sft": 1.4086356163024902, |
| "losses/total": 0.6028671264648438, |
| "ref_logps/chosen": -38.0629768371582, |
| "ref_logps/rejected": -43.7314453125, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.5507906079292297, |
| "rewards/margins": 0.3000965714454651, |
| "rewards/rejected": -0.8508871793746948, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 6.233323532005839, |
| "learning_rate": 3.4269662921348313e-07, |
| "logps/chosen": -37.21058654785156, |
| "logps/rejected": -48.0037841796875, |
| "loss": 0.594, |
| "losses/dpo": 0.614506721496582, |
| "losses/sft": 1.60889732837677, |
| "losses/total": 0.614506721496582, |
| "ref_logps/chosen": -31.845245361328125, |
| "ref_logps/rejected": -39.9993782043457, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.536533772945404, |
| "rewards/margins": 0.2639070749282837, |
| "rewards/rejected": -0.800440788269043, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 6.3687654295239335, |
| "learning_rate": 3.4129213483146064e-07, |
| "logps/chosen": -38.31336975097656, |
| "logps/rejected": -46.770484924316406, |
| "loss": 0.6353, |
| "losses/dpo": 0.67460036277771, |
| "losses/sft": 1.5722901821136475, |
| "losses/total": 0.67460036277771, |
| "ref_logps/chosen": -32.139488220214844, |
| "ref_logps/rejected": -38.692901611328125, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.617388129234314, |
| "rewards/margins": 0.1903703510761261, |
| "rewards/rejected": -0.8077584505081177, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 6.739574073940404, |
| "learning_rate": 3.398876404494382e-07, |
| "logps/chosen": -40.92176055908203, |
| "logps/rejected": -55.60083770751953, |
| "loss": 0.5768, |
| "losses/dpo": 0.6374070644378662, |
| "losses/sft": 1.6940921545028687, |
| "losses/total": 0.6374070644378662, |
| "ref_logps/chosen": -34.817481994628906, |
| "ref_logps/rejected": -46.21774673461914, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6104279160499573, |
| "rewards/margins": 0.3278810977935791, |
| "rewards/rejected": -0.9383090138435364, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 6.879053261576145, |
| "learning_rate": 3.3848314606741575e-07, |
| "logps/chosen": -42.9251823425293, |
| "logps/rejected": -51.702964782714844, |
| "loss": 0.6139, |
| "losses/dpo": 0.6116975545883179, |
| "losses/sft": 1.40887451171875, |
| "losses/total": 0.6116975545883179, |
| "ref_logps/chosen": -37.03919982910156, |
| "ref_logps/rejected": -43.293392181396484, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.5885984301567078, |
| "rewards/margins": 0.25235864520072937, |
| "rewards/rejected": -0.8409571051597595, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 6.694865071581937, |
| "learning_rate": 3.3707865168539325e-07, |
| "logps/chosen": -42.003990173339844, |
| "logps/rejected": -50.280548095703125, |
| "loss": 0.6314, |
| "losses/dpo": 0.6589547991752625, |
| "losses/sft": 1.475001573562622, |
| "losses/total": 0.6589547991752625, |
| "ref_logps/chosen": -35.952449798583984, |
| "ref_logps/rejected": -42.320465087890625, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.6051540374755859, |
| "rewards/margins": 0.1908540576696396, |
| "rewards/rejected": -0.7960080504417419, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 6.680369593277332, |
| "learning_rate": 3.356741573033708e-07, |
| "logps/chosen": -38.10074996948242, |
| "logps/rejected": -47.0694694519043, |
| "loss": 0.6165, |
| "losses/dpo": 0.5133580565452576, |
| "losses/sft": 1.357291340827942, |
| "losses/total": 0.5133580565452576, |
| "ref_logps/chosen": -32.30625915527344, |
| "ref_logps/rejected": -38.88528060913086, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.5794489979743958, |
| "rewards/margins": 0.23896978795528412, |
| "rewards/rejected": -0.8184187412261963, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 7.163827933877828, |
| "learning_rate": 3.3426966292134826e-07, |
| "logps/chosen": -43.57667541503906, |
| "logps/rejected": -53.30217742919922, |
| "loss": 0.5908, |
| "losses/dpo": 0.574418306350708, |
| "losses/sft": 1.7125813961029053, |
| "losses/total": 0.574418306350708, |
| "ref_logps/chosen": -37.39282989501953, |
| "ref_logps/rejected": -44.2191162109375, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6183844804763794, |
| "rewards/margins": 0.2899210453033447, |
| "rewards/rejected": -0.9083055257797241, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 7.534492030316663, |
| "learning_rate": 3.328651685393258e-07, |
| "logps/chosen": -43.87846755981445, |
| "logps/rejected": -50.815635681152344, |
| "loss": 0.6314, |
| "losses/dpo": 0.6066723465919495, |
| "losses/sft": 1.625878930091858, |
| "losses/total": 0.6066723465919495, |
| "ref_logps/chosen": -37.40098190307617, |
| "ref_logps/rejected": -42.1273193359375, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6477489471435547, |
| "rewards/margins": 0.2210829257965088, |
| "rewards/rejected": -0.8688318729400635, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 6.461969936591063, |
| "learning_rate": 3.314606741573033e-07, |
| "logps/chosen": -41.52513122558594, |
| "logps/rejected": -50.716064453125, |
| "loss": 0.5689, |
| "losses/dpo": 0.5582201480865479, |
| "losses/sft": 1.5073952674865723, |
| "losses/total": 0.5582201480865479, |
| "ref_logps/chosen": -35.848846435546875, |
| "ref_logps/rejected": -41.70889663696289, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.567628026008606, |
| "rewards/margins": 0.333088219165802, |
| "rewards/rejected": -0.9007163047790527, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 6.942966081986398, |
| "learning_rate": 3.300561797752809e-07, |
| "logps/chosen": -45.35464859008789, |
| "logps/rejected": -48.982810974121094, |
| "loss": 0.6365, |
| "losses/dpo": 0.5614318251609802, |
| "losses/sft": 1.7859472036361694, |
| "losses/total": 0.5614318251609802, |
| "ref_logps/chosen": -39.001136779785156, |
| "ref_logps/rejected": -40.674232482910156, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6353514790534973, |
| "rewards/margins": 0.19550636410713196, |
| "rewards/rejected": -0.8308578729629517, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 7.180092377812206, |
| "learning_rate": 3.2865168539325844e-07, |
| "logps/chosen": -45.20994186401367, |
| "logps/rejected": -52.836875915527344, |
| "loss": 0.6224, |
| "losses/dpo": 0.5493739247322083, |
| "losses/sft": 1.547910213470459, |
| "losses/total": 0.5493739247322083, |
| "ref_logps/chosen": -38.36485290527344, |
| "ref_logps/rejected": -43.65592575073242, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.684508740901947, |
| "rewards/margins": 0.2335864156484604, |
| "rewards/rejected": -0.9180951714515686, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 6.839328465422381, |
| "learning_rate": 3.2724719101123594e-07, |
| "logps/chosen": -44.02252197265625, |
| "logps/rejected": -48.20290756225586, |
| "loss": 0.6261, |
| "losses/dpo": 0.5617036819458008, |
| "losses/sft": 1.5726804733276367, |
| "losses/total": 0.5617036819458008, |
| "ref_logps/chosen": -37.56252670288086, |
| "ref_logps/rejected": -39.5811882019043, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.6459991931915283, |
| "rewards/margins": 0.21617242693901062, |
| "rewards/rejected": -0.8621717095375061, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 7.080895585848076, |
| "learning_rate": 3.258426966292135e-07, |
| "logps/chosen": -44.87802505493164, |
| "logps/rejected": -53.4232292175293, |
| "loss": 0.6444, |
| "losses/dpo": 0.5438011884689331, |
| "losses/sft": 1.6607606410980225, |
| "losses/total": 0.5438011884689331, |
| "ref_logps/chosen": -37.73242950439453, |
| "ref_logps/rejected": -44.338134765625, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.7145596146583557, |
| "rewards/margins": 0.1939493715763092, |
| "rewards/rejected": -0.9085089564323425, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 6.817784306171125, |
| "learning_rate": 3.24438202247191e-07, |
| "logps/chosen": -41.74745559692383, |
| "logps/rejected": -54.77409362792969, |
| "loss": 0.5757, |
| "losses/dpo": 0.5074477195739746, |
| "losses/sft": 1.4384926557540894, |
| "losses/total": 0.5074477195739746, |
| "ref_logps/chosen": -35.96852493286133, |
| "ref_logps/rejected": -45.66744613647461, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.5778931379318237, |
| "rewards/margins": 0.3327715992927551, |
| "rewards/rejected": -0.9106647372245789, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 6.73402019836184, |
| "learning_rate": 3.2303370786516856e-07, |
| "logps/chosen": -42.45885467529297, |
| "logps/rejected": -55.83628845214844, |
| "loss": 0.5582, |
| "losses/dpo": 0.5949134230613708, |
| "losses/sft": 1.7396336793899536, |
| "losses/total": 0.5949134230613708, |
| "ref_logps/chosen": -35.91239547729492, |
| "ref_logps/rejected": -45.41798400878906, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.6546458601951599, |
| "rewards/margins": 0.387184739112854, |
| "rewards/rejected": -1.0418306589126587, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 6.484969223325532, |
| "learning_rate": 3.21629213483146e-07, |
| "logps/chosen": -39.55500030517578, |
| "logps/rejected": -53.75917053222656, |
| "loss": 0.5553, |
| "losses/dpo": 0.5379188060760498, |
| "losses/sft": 1.6754546165466309, |
| "losses/total": 0.5379188060760498, |
| "ref_logps/chosen": -33.612388610839844, |
| "ref_logps/rejected": -44.048587799072266, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.5942604541778564, |
| "rewards/margins": 0.3767976760864258, |
| "rewards/rejected": -0.971058189868927, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 7.553429925940112, |
| "learning_rate": 3.2022471910112357e-07, |
| "logps/chosen": -42.48908615112305, |
| "logps/rejected": -53.703208923339844, |
| "loss": 0.6156, |
| "losses/dpo": 0.5968649983406067, |
| "losses/sft": 1.4021852016448975, |
| "losses/total": 0.5968649983406067, |
| "ref_logps/chosen": -36.02775573730469, |
| "ref_logps/rejected": -44.81937026977539, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.6461330056190491, |
| "rewards/margins": 0.24225082993507385, |
| "rewards/rejected": -0.8883838057518005, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 6.938904519141351, |
| "learning_rate": 3.1882022471910107e-07, |
| "logps/chosen": -41.907588958740234, |
| "logps/rejected": -51.34623718261719, |
| "loss": 0.606, |
| "losses/dpo": 0.5469992756843567, |
| "losses/sft": 1.526263952255249, |
| "losses/total": 0.5469992756843567, |
| "ref_logps/chosen": -35.054046630859375, |
| "ref_logps/rejected": -41.81439971923828, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.6853541135787964, |
| "rewards/margins": 0.26782965660095215, |
| "rewards/rejected": -0.9531837701797485, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 6.455751734226468, |
| "learning_rate": 3.1741573033707863e-07, |
| "logps/chosen": -43.997074127197266, |
| "logps/rejected": -52.1095085144043, |
| "loss": 0.5652, |
| "losses/dpo": 0.5796064138412476, |
| "losses/sft": 1.7088840007781982, |
| "losses/total": 0.5796064138412476, |
| "ref_logps/chosen": -37.47534942626953, |
| "ref_logps/rejected": -42.05298614501953, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.6521726846694946, |
| "rewards/margins": 0.35347938537597656, |
| "rewards/rejected": -1.0056521892547607, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 6.925720662101029, |
| "learning_rate": 3.160112359550562e-07, |
| "logps/chosen": -42.9494743347168, |
| "logps/rejected": -51.66035461425781, |
| "loss": 0.5733, |
| "losses/dpo": 0.5771209001541138, |
| "losses/sft": 1.3783965110778809, |
| "losses/total": 0.5771209001541138, |
| "ref_logps/chosen": -36.78116989135742, |
| "ref_logps/rejected": -41.843605041503906, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6168303489685059, |
| "rewards/margins": 0.36484503746032715, |
| "rewards/rejected": -0.981675386428833, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 7.01889024226675, |
| "learning_rate": 3.146067415730337e-07, |
| "logps/chosen": -45.94253158569336, |
| "logps/rejected": -52.16916275024414, |
| "loss": 0.6015, |
| "losses/dpo": 0.5898208618164062, |
| "losses/sft": 1.6218047142028809, |
| "losses/total": 0.5898208618164062, |
| "ref_logps/chosen": -39.25119400024414, |
| "ref_logps/rejected": -42.79926300048828, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.669133722782135, |
| "rewards/margins": 0.2678561806678772, |
| "rewards/rejected": -0.936989963054657, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 7.495610381062607, |
| "learning_rate": 3.1320224719101125e-07, |
| "logps/chosen": -44.17959976196289, |
| "logps/rejected": -53.05359649658203, |
| "loss": 0.6002, |
| "losses/dpo": 0.630514919757843, |
| "losses/sft": 1.7863552570343018, |
| "losses/total": 0.630514919757843, |
| "ref_logps/chosen": -37.75782012939453, |
| "ref_logps/rejected": -43.63001251220703, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.642177939414978, |
| "rewards/margins": 0.30018070340156555, |
| "rewards/rejected": -0.9423586130142212, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 7.4434324895050015, |
| "learning_rate": 3.1179775280898875e-07, |
| "logps/chosen": -47.16429901123047, |
| "logps/rejected": -53.266780853271484, |
| "loss": 0.6233, |
| "losses/dpo": 0.5390438437461853, |
| "losses/sft": 1.480837345123291, |
| "losses/total": 0.5390438437461853, |
| "ref_logps/chosen": -40.324798583984375, |
| "ref_logps/rejected": -44.025787353515625, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.6839500069618225, |
| "rewards/margins": 0.24014970660209656, |
| "rewards/rejected": -0.9240997433662415, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 7.0617610747390644, |
| "learning_rate": 3.103932584269663e-07, |
| "logps/chosen": -45.060882568359375, |
| "logps/rejected": -54.979156494140625, |
| "loss": 0.5891, |
| "losses/dpo": 0.5568109750747681, |
| "losses/sft": 1.6295528411865234, |
| "losses/total": 0.5568109750747681, |
| "ref_logps/chosen": -37.929840087890625, |
| "ref_logps/rejected": -44.73991394042969, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7131036520004272, |
| "rewards/margins": 0.3108205795288086, |
| "rewards/rejected": -1.0239241123199463, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 6.663174112724157, |
| "learning_rate": 3.0898876404494376e-07, |
| "logps/chosen": -42.657920837402344, |
| "logps/rejected": -47.67673110961914, |
| "loss": 0.629, |
| "losses/dpo": 0.7223004102706909, |
| "losses/sft": 1.3237264156341553, |
| "losses/total": 0.7223004102706909, |
| "ref_logps/chosen": -36.358001708984375, |
| "ref_logps/rejected": -38.938507080078125, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.6299920082092285, |
| "rewards/margins": 0.24383032321929932, |
| "rewards/rejected": -0.8738222122192383, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 7.940936921413792, |
| "learning_rate": 3.075842696629213e-07, |
| "logps/chosen": -48.067203521728516, |
| "logps/rejected": -52.38322067260742, |
| "loss": 0.6277, |
| "losses/dpo": 0.6528229117393494, |
| "losses/sft": 1.5663461685180664, |
| "losses/total": 0.6528229117393494, |
| "ref_logps/chosen": -40.93121337890625, |
| "ref_logps/rejected": -42.879188537597656, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7135992050170898, |
| "rewards/margins": 0.23680387437343597, |
| "rewards/rejected": -0.950403094291687, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 6.9909555671219366, |
| "learning_rate": 3.0617977528089887e-07, |
| "logps/chosen": -43.74810791015625, |
| "logps/rejected": -51.602622985839844, |
| "loss": 0.6134, |
| "losses/dpo": 0.6351133584976196, |
| "losses/sft": 1.548452615737915, |
| "losses/total": 0.6351133584976196, |
| "ref_logps/chosen": -36.539649963378906, |
| "ref_logps/rejected": -41.724647521972656, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.7208462953567505, |
| "rewards/margins": 0.26695096492767334, |
| "rewards/rejected": -0.9877973794937134, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 7.516487264439432, |
| "learning_rate": 3.047752808988764e-07, |
| "logps/chosen": -45.41664123535156, |
| "logps/rejected": -52.19443130493164, |
| "loss": 0.6325, |
| "losses/dpo": 0.6936246752738953, |
| "losses/sft": 1.418731451034546, |
| "losses/total": 0.6936246752738953, |
| "ref_logps/chosen": -37.89662170410156, |
| "ref_logps/rejected": -42.46479797363281, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.7520017623901367, |
| "rewards/margins": 0.22096163034439087, |
| "rewards/rejected": -0.9729634523391724, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.36, |
| "grad_norm": 7.386682971240318, |
| "learning_rate": 3.0337078651685393e-07, |
| "logps/chosen": -44.32128143310547, |
| "logps/rejected": -56.52943420410156, |
| "loss": 0.589, |
| "losses/dpo": 0.50272536277771, |
| "losses/sft": 1.4175364971160889, |
| "losses/total": 0.50272536277771, |
| "ref_logps/chosen": -36.94242477416992, |
| "ref_logps/rejected": -45.8451042175293, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.7378860712051392, |
| "rewards/margins": 0.3305472731590271, |
| "rewards/rejected": -1.0684332847595215, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 7.070520625555024, |
| "learning_rate": 3.0196629213483144e-07, |
| "logps/chosen": -42.875099182128906, |
| "logps/rejected": -52.050384521484375, |
| "loss": 0.5815, |
| "losses/dpo": 0.6107900738716125, |
| "losses/sft": 1.7456879615783691, |
| "losses/total": 0.6107900738716125, |
| "ref_logps/chosen": -36.82639694213867, |
| "ref_logps/rejected": -42.541603088378906, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.604870617389679, |
| "rewards/margins": 0.34600716829299927, |
| "rewards/rejected": -0.9508777856826782, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 6.880606354027993, |
| "learning_rate": 3.00561797752809e-07, |
| "logps/chosen": -44.50994110107422, |
| "logps/rejected": -50.33254623413086, |
| "loss": 0.5976, |
| "losses/dpo": 0.5528784990310669, |
| "losses/sft": 1.7669578790664673, |
| "losses/total": 0.5528784990310669, |
| "ref_logps/chosen": -37.44862365722656, |
| "ref_logps/rejected": -40.247039794921875, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.7061322331428528, |
| "rewards/margins": 0.30241847038269043, |
| "rewards/rejected": -1.0085506439208984, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 7.02791538850361, |
| "learning_rate": 2.991573033707865e-07, |
| "logps/chosen": -43.87753677368164, |
| "logps/rejected": -49.34949493408203, |
| "loss": 0.6184, |
| "losses/dpo": 0.5282893180847168, |
| "losses/sft": 1.5792714357376099, |
| "losses/total": 0.5282893180847168, |
| "ref_logps/chosen": -36.59724044799805, |
| "ref_logps/rejected": -39.5660285949707, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.7280292510986328, |
| "rewards/margins": 0.25031745433807373, |
| "rewards/rejected": -0.9783467054367065, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.39, |
| "grad_norm": 7.145778710825677, |
| "learning_rate": 2.9775280898876406e-07, |
| "logps/chosen": -44.12153625488281, |
| "logps/rejected": -50.03323745727539, |
| "loss": 0.5994, |
| "losses/dpo": 0.5445400476455688, |
| "losses/sft": 1.3715200424194336, |
| "losses/total": 0.5445400476455688, |
| "ref_logps/chosen": -36.779212951660156, |
| "ref_logps/rejected": -39.96929168701172, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7342325448989868, |
| "rewards/margins": 0.2721615731716156, |
| "rewards/rejected": -1.0063941478729248, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 7.033244427636761, |
| "learning_rate": 2.9634831460674156e-07, |
| "logps/chosen": -44.52336120605469, |
| "logps/rejected": -52.357810974121094, |
| "loss": 0.5954, |
| "losses/dpo": 0.7007085084915161, |
| "losses/sft": 1.6124120950698853, |
| "losses/total": 0.7007085084915161, |
| "ref_logps/chosen": -37.63973617553711, |
| "ref_logps/rejected": -42.18433380126953, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6883625984191895, |
| "rewards/margins": 0.32898518443107605, |
| "rewards/rejected": -1.017347812652588, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 6.657232520968286, |
| "learning_rate": 2.9494382022471906e-07, |
| "logps/chosen": -41.616920471191406, |
| "logps/rejected": -52.240867614746094, |
| "loss": 0.5708, |
| "losses/dpo": 0.6222548484802246, |
| "losses/sft": 1.4851452112197876, |
| "losses/total": 0.6222548484802246, |
| "ref_logps/chosen": -35.22339630126953, |
| "ref_logps/rejected": -42.063621520996094, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.6393523216247559, |
| "rewards/margins": 0.37837234139442444, |
| "rewards/rejected": -1.0177247524261475, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 7.293022584639844, |
| "learning_rate": 2.935393258426966e-07, |
| "logps/chosen": -44.5795783996582, |
| "logps/rejected": -51.8583869934082, |
| "loss": 0.613, |
| "losses/dpo": 0.5626444816589355, |
| "losses/sft": 1.5801838636398315, |
| "losses/total": 0.5626444816589355, |
| "ref_logps/chosen": -37.548030853271484, |
| "ref_logps/rejected": -41.92090606689453, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.7031550407409668, |
| "rewards/margins": 0.2905934154987335, |
| "rewards/rejected": -0.9937484264373779, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 6.7646034429768385, |
| "learning_rate": 2.921348314606741e-07, |
| "logps/chosen": -38.493064880371094, |
| "logps/rejected": -52.74738693237305, |
| "loss": 0.5673, |
| "losses/dpo": 0.5454456210136414, |
| "losses/sft": 1.4984629154205322, |
| "losses/total": 0.5454456210136414, |
| "ref_logps/chosen": -31.663148880004883, |
| "ref_logps/rejected": -42.071197509765625, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.6829913854598999, |
| "rewards/margins": 0.38462772965431213, |
| "rewards/rejected": -1.0676190853118896, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 6.461217065826113, |
| "learning_rate": 2.907303370786517e-07, |
| "logps/chosen": -41.286556243896484, |
| "logps/rejected": -50.088348388671875, |
| "loss": 0.5702, |
| "losses/dpo": 0.5214348435401917, |
| "losses/sft": 1.4683022499084473, |
| "losses/total": 0.5214348435401917, |
| "ref_logps/chosen": -35.07402038574219, |
| "ref_logps/rejected": -40.096168518066406, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6212539672851562, |
| "rewards/margins": 0.377963662147522, |
| "rewards/rejected": -0.9992176294326782, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 7.005927567034465, |
| "learning_rate": 2.893258426966292e-07, |
| "logps/chosen": -40.85224533081055, |
| "logps/rejected": -47.50454330444336, |
| "loss": 0.6468, |
| "losses/dpo": 0.5624043941497803, |
| "losses/sft": 1.3726718425750732, |
| "losses/total": 0.5624043941497803, |
| "ref_logps/chosen": -34.436492919921875, |
| "ref_logps/rejected": -39.054630279541016, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.6415754556655884, |
| "rewards/margins": 0.2034158706665039, |
| "rewards/rejected": -0.8449913263320923, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 7.483874035437063, |
| "learning_rate": 2.8792134831460674e-07, |
| "logps/chosen": -42.50736999511719, |
| "logps/rejected": -58.21019744873047, |
| "loss": 0.5523, |
| "losses/dpo": 0.6698145270347595, |
| "losses/sft": 1.5408368110656738, |
| "losses/total": 0.6698145270347595, |
| "ref_logps/chosen": -36.81276321411133, |
| "ref_logps/rejected": -48.2528076171875, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.5694608688354492, |
| "rewards/margins": 0.4262778162956238, |
| "rewards/rejected": -0.995738685131073, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 6.9825083587199, |
| "learning_rate": 2.8651685393258425e-07, |
| "logps/chosen": -45.57709503173828, |
| "logps/rejected": -53.14373016357422, |
| "loss": 0.5855, |
| "losses/dpo": 0.557357132434845, |
| "losses/sft": 1.6354026794433594, |
| "losses/total": 0.557357132434845, |
| "ref_logps/chosen": -39.346527099609375, |
| "ref_logps/rejected": -43.652854919433594, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.6230565309524536, |
| "rewards/margins": 0.32603132724761963, |
| "rewards/rejected": -0.9490878582000732, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 6.9248793596215314, |
| "learning_rate": 2.851123595505618e-07, |
| "logps/chosen": -41.4918098449707, |
| "logps/rejected": -51.642330169677734, |
| "loss": 0.614, |
| "losses/dpo": 0.5539823770523071, |
| "losses/sft": 1.4280143976211548, |
| "losses/total": 0.5539823770523071, |
| "ref_logps/chosen": -34.640289306640625, |
| "ref_logps/rejected": -42.01956558227539, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.6851522326469421, |
| "rewards/margins": 0.2771243751049042, |
| "rewards/rejected": -0.9622765779495239, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 7.819580369685109, |
| "learning_rate": 2.8370786516853936e-07, |
| "logps/chosen": -45.17947769165039, |
| "logps/rejected": -54.26673126220703, |
| "loss": 0.5983, |
| "losses/dpo": 0.5532131195068359, |
| "losses/sft": 1.5786592960357666, |
| "losses/total": 0.5532131195068359, |
| "ref_logps/chosen": -38.547950744628906, |
| "ref_logps/rejected": -44.515296936035156, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6631526350975037, |
| "rewards/margins": 0.3119913339614868, |
| "rewards/rejected": -0.9751439094543457, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 6.908089828532824, |
| "learning_rate": 2.823033707865168e-07, |
| "logps/chosen": -39.231468200683594, |
| "logps/rejected": -55.21925735473633, |
| "loss": 0.5648, |
| "losses/dpo": 0.5973429083824158, |
| "losses/sft": 1.6660652160644531, |
| "losses/total": 0.5973429083824158, |
| "ref_logps/chosen": -32.866737365722656, |
| "ref_logps/rejected": -44.704856872558594, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6364729404449463, |
| "rewards/margins": 0.4149664640426636, |
| "rewards/rejected": -1.0514394044876099, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 6.726944591334497, |
| "learning_rate": 2.8089887640449437e-07, |
| "logps/chosen": -40.06050109863281, |
| "logps/rejected": -53.288673400878906, |
| "loss": 0.5791, |
| "losses/dpo": 0.5540711879730225, |
| "losses/sft": 1.7805967330932617, |
| "losses/total": 0.5540711879730225, |
| "ref_logps/chosen": -33.56330490112305, |
| "ref_logps/rejected": -42.997859954833984, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.649719774723053, |
| "rewards/margins": 0.37936151027679443, |
| "rewards/rejected": -1.0290813446044922, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 7.150851904029176, |
| "learning_rate": 2.794943820224719e-07, |
| "logps/chosen": -47.1893424987793, |
| "logps/rejected": -61.44281005859375, |
| "loss": 0.5702, |
| "losses/dpo": 0.7234626412391663, |
| "losses/sft": 1.6843864917755127, |
| "losses/total": 0.7234626412391663, |
| "ref_logps/chosen": -39.28502655029297, |
| "ref_logps/rejected": -49.627784729003906, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7904319763183594, |
| "rewards/margins": 0.39107024669647217, |
| "rewards/rejected": -1.1815022230148315, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 7.427853846385361, |
| "learning_rate": 2.7808988764044943e-07, |
| "logps/chosen": -43.90837097167969, |
| "logps/rejected": -49.889678955078125, |
| "loss": 0.6097, |
| "losses/dpo": 0.5974606275558472, |
| "losses/sft": 1.7023361921310425, |
| "losses/total": 0.5974606275558472, |
| "ref_logps/chosen": -36.68721008300781, |
| "ref_logps/rejected": -39.80302047729492, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.722116231918335, |
| "rewards/margins": 0.2865493595600128, |
| "rewards/rejected": -1.0086655616760254, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 6.720052652716852, |
| "learning_rate": 2.7668539325842694e-07, |
| "logps/chosen": -40.47029495239258, |
| "logps/rejected": -52.58824157714844, |
| "loss": 0.5673, |
| "losses/dpo": 0.5275993347167969, |
| "losses/sft": 1.4116981029510498, |
| "losses/total": 0.5275993347167969, |
| "ref_logps/chosen": -33.68723678588867, |
| "ref_logps/rejected": -42.008827209472656, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6783058643341064, |
| "rewards/margins": 0.3796355426311493, |
| "rewards/rejected": -1.0579413175582886, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 7.464608685292226, |
| "learning_rate": 2.752808988764045e-07, |
| "logps/chosen": -46.24801254272461, |
| "logps/rejected": -54.933780670166016, |
| "loss": 0.61, |
| "losses/dpo": 0.6066948771476746, |
| "losses/sft": 1.6309008598327637, |
| "losses/total": 0.6066948771476746, |
| "ref_logps/chosen": -38.81559753417969, |
| "ref_logps/rejected": -44.58855438232422, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.743241548538208, |
| "rewards/margins": 0.2912812829017639, |
| "rewards/rejected": -1.0345228910446167, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 7.360337757301619, |
| "learning_rate": 2.73876404494382e-07, |
| "logps/chosen": -42.876792907714844, |
| "logps/rejected": -50.461334228515625, |
| "loss": 0.6213, |
| "losses/dpo": 0.6310982704162598, |
| "losses/sft": 1.441427230834961, |
| "losses/total": 0.6310982704162598, |
| "ref_logps/chosen": -36.28274917602539, |
| "ref_logps/rejected": -41.36058044433594, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6594043970108032, |
| "rewards/margins": 0.2506704330444336, |
| "rewards/rejected": -0.9100748300552368, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 7.43302729298079, |
| "learning_rate": 2.7247191011235955e-07, |
| "logps/chosen": -43.45911407470703, |
| "logps/rejected": -50.20298385620117, |
| "loss": 0.5552, |
| "losses/dpo": 0.5599596500396729, |
| "losses/sft": 1.4739470481872559, |
| "losses/total": 0.5599596500396729, |
| "ref_logps/chosen": -37.075191497802734, |
| "ref_logps/rejected": -39.420005798339844, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6383919715881348, |
| "rewards/margins": 0.4399053752422333, |
| "rewards/rejected": -1.0782973766326904, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 7.05544065339559, |
| "learning_rate": 2.710674157303371e-07, |
| "logps/chosen": -48.13520050048828, |
| "logps/rejected": -55.488975524902344, |
| "loss": 0.5683, |
| "losses/dpo": 0.5512528419494629, |
| "losses/sft": 1.421828269958496, |
| "losses/total": 0.5512528419494629, |
| "ref_logps/chosen": -40.806800842285156, |
| "ref_logps/rejected": -44.204917907714844, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7328400611877441, |
| "rewards/margins": 0.39556559920310974, |
| "rewards/rejected": -1.1284055709838867, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 7.072397811821575, |
| "learning_rate": 2.6966292134831456e-07, |
| "logps/chosen": -45.89094161987305, |
| "logps/rejected": -56.12247085571289, |
| "loss": 0.5687, |
| "losses/dpo": 0.5942108035087585, |
| "losses/sft": 1.671670913696289, |
| "losses/total": 0.5942108035087585, |
| "ref_logps/chosen": -38.66583251953125, |
| "ref_logps/rejected": -45.235042572021484, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.7225111722946167, |
| "rewards/margins": 0.3662317395210266, |
| "rewards/rejected": -1.088742971420288, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 7.135519070946246, |
| "learning_rate": 2.682584269662921e-07, |
| "logps/chosen": -44.51463317871094, |
| "logps/rejected": -53.46598434448242, |
| "loss": 0.5668, |
| "losses/dpo": 0.5319070816040039, |
| "losses/sft": 1.5628294944763184, |
| "losses/total": 0.5319070816040039, |
| "ref_logps/chosen": -37.279029846191406, |
| "ref_logps/rejected": -42.57787322998047, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7235599756240845, |
| "rewards/margins": 0.36525097489356995, |
| "rewards/rejected": -1.0888110399246216, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 7.107636740157782, |
| "learning_rate": 2.668539325842696e-07, |
| "logps/chosen": -43.40117645263672, |
| "logps/rejected": -54.69598388671875, |
| "loss": 0.5524, |
| "losses/dpo": 0.5264509320259094, |
| "losses/sft": 1.5363452434539795, |
| "losses/total": 0.5264509320259094, |
| "ref_logps/chosen": -36.371551513671875, |
| "ref_logps/rejected": -43.2625732421875, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.7029624581336975, |
| "rewards/margins": 0.44037845730781555, |
| "rewards/rejected": -1.1433409452438354, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 7.289414925759057, |
| "learning_rate": 2.654494382022472e-07, |
| "logps/chosen": -42.475379943847656, |
| "logps/rejected": -49.646728515625, |
| "loss": 0.6046, |
| "losses/dpo": 0.6572248935699463, |
| "losses/sft": 1.6387099027633667, |
| "losses/total": 0.6572248935699463, |
| "ref_logps/chosen": -35.308807373046875, |
| "ref_logps/rejected": -39.25323486328125, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7166574597358704, |
| "rewards/margins": 0.32269221544265747, |
| "rewards/rejected": -1.0393496751785278, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 7.273876435078599, |
| "learning_rate": 2.640449438202247e-07, |
| "logps/chosen": -42.280967712402344, |
| "logps/rejected": -48.80766296386719, |
| "loss": 0.6213, |
| "losses/dpo": 0.5971169471740723, |
| "losses/sft": 1.7042605876922607, |
| "losses/total": 0.5971169471740723, |
| "ref_logps/chosen": -35.188377380371094, |
| "ref_logps/rejected": -39.048980712890625, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -0.7092592716217041, |
| "rewards/margins": 0.2666093707084656, |
| "rewards/rejected": -0.9758686423301697, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 7.389043621661051, |
| "learning_rate": 2.6264044943820224e-07, |
| "logps/chosen": -43.01720428466797, |
| "logps/rejected": -52.86360549926758, |
| "loss": 0.6004, |
| "losses/dpo": 0.5311284065246582, |
| "losses/sft": 1.673902988433838, |
| "losses/total": 0.5311284065246582, |
| "ref_logps/chosen": -35.34561538696289, |
| "ref_logps/rejected": -41.90196228027344, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.7671589851379395, |
| "rewards/margins": 0.3290054500102997, |
| "rewards/rejected": -1.096164345741272, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 7.051856361062949, |
| "learning_rate": 2.612359550561798e-07, |
| "logps/chosen": -43.410194396972656, |
| "logps/rejected": -56.95100784301758, |
| "loss": 0.5527, |
| "losses/dpo": 0.494179904460907, |
| "losses/sft": 1.3610440492630005, |
| "losses/total": 0.494179904460907, |
| "ref_logps/chosen": -36.352073669433594, |
| "ref_logps/rejected": -45.52418518066406, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.7058122754096985, |
| "rewards/margins": 0.4368700683116913, |
| "rewards/rejected": -1.1426823139190674, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.59, |
| "grad_norm": 7.075204680484654, |
| "learning_rate": 2.598314606741573e-07, |
| "logps/chosen": -44.7838249206543, |
| "logps/rejected": -52.02484130859375, |
| "loss": 0.6038, |
| "losses/dpo": 0.5901740193367004, |
| "losses/sft": 1.7182517051696777, |
| "losses/total": 0.5901740193367004, |
| "ref_logps/chosen": -37.16931915283203, |
| "ref_logps/rejected": -41.17018127441406, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7614503502845764, |
| "rewards/margins": 0.32401591539382935, |
| "rewards/rejected": -1.0854662656784058, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 7.444039226905721, |
| "learning_rate": 2.5842696629213486e-07, |
| "logps/chosen": -41.19989776611328, |
| "logps/rejected": -49.64472961425781, |
| "loss": 0.5961, |
| "losses/dpo": 0.5703378319740295, |
| "losses/sft": 1.288915753364563, |
| "losses/total": 0.5703378319740295, |
| "ref_logps/chosen": -34.397457122802734, |
| "ref_logps/rejected": -39.43400573730469, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.6802438497543335, |
| "rewards/margins": 0.34082797169685364, |
| "rewards/rejected": -1.0210717916488647, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.61, |
| "grad_norm": 7.3272234640712455, |
| "learning_rate": 2.5702247191011236e-07, |
| "logps/chosen": -49.161766052246094, |
| "logps/rejected": -55.671295166015625, |
| "loss": 0.5862, |
| "losses/dpo": 0.7499480843544006, |
| "losses/sft": 1.8793140649795532, |
| "losses/total": 0.7499480843544006, |
| "ref_logps/chosen": -41.297489166259766, |
| "ref_logps/rejected": -44.11161422729492, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7864278554916382, |
| "rewards/margins": 0.3695400655269623, |
| "rewards/rejected": -1.1559679508209229, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 7.817780320058273, |
| "learning_rate": 2.5561797752808987e-07, |
| "logps/chosen": -46.763206481933594, |
| "logps/rejected": -53.703033447265625, |
| "loss": 0.6266, |
| "losses/dpo": 0.48047423362731934, |
| "losses/sft": 1.5680122375488281, |
| "losses/total": 0.48047423362731934, |
| "ref_logps/chosen": -39.49818801879883, |
| "ref_logps/rejected": -43.53962326049805, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.7265015840530396, |
| "rewards/margins": 0.28983935713768005, |
| "rewards/rejected": -1.016340970993042, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 7.508782666466954, |
| "learning_rate": 2.5421348314606737e-07, |
| "logps/chosen": -47.78954315185547, |
| "logps/rejected": -56.90927505493164, |
| "loss": 0.5628, |
| "losses/dpo": 0.537736177444458, |
| "losses/sft": 1.6823458671569824, |
| "losses/total": 0.537736177444458, |
| "ref_logps/chosen": -40.28362274169922, |
| "ref_logps/rejected": -45.37610626220703, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.7505923509597778, |
| "rewards/margins": 0.4027244448661804, |
| "rewards/rejected": -1.1533167362213135, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 7.806242305852612, |
| "learning_rate": 2.5280898876404493e-07, |
| "logps/chosen": -47.2044677734375, |
| "logps/rejected": -58.619651794433594, |
| "loss": 0.5899, |
| "losses/dpo": 0.6399192214012146, |
| "losses/sft": 1.363295316696167, |
| "losses/total": 0.6399192214012146, |
| "ref_logps/chosen": -39.20256423950195, |
| "ref_logps/rejected": -46.85260772705078, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8001901507377625, |
| "rewards/margins": 0.3765140473842621, |
| "rewards/rejected": -1.1767041683197021, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.64, |
| "grad_norm": 6.621098009181271, |
| "learning_rate": 2.5140449438202243e-07, |
| "logps/chosen": -36.010169982910156, |
| "logps/rejected": -48.608699798583984, |
| "loss": 0.5501, |
| "losses/dpo": 0.5600734949111938, |
| "losses/sft": 1.3302438259124756, |
| "losses/total": 0.5600734949111938, |
| "ref_logps/chosen": -29.427637100219727, |
| "ref_logps/rejected": -37.72674560546875, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6582531929016113, |
| "rewards/margins": 0.4299423098564148, |
| "rewards/rejected": -1.088195562362671, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 7.0657521689003735, |
| "learning_rate": 2.5e-07, |
| "logps/chosen": -42.20947265625, |
| "logps/rejected": -53.40728759765625, |
| "loss": 0.5808, |
| "losses/dpo": 0.5706441402435303, |
| "losses/sft": 1.390072226524353, |
| "losses/total": 0.5706441402435303, |
| "ref_logps/chosen": -34.44993591308594, |
| "ref_logps/rejected": -41.737003326416016, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.7759537696838379, |
| "rewards/margins": 0.3910742402076721, |
| "rewards/rejected": -1.1670279502868652, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 7.920819614767415, |
| "learning_rate": 2.485955056179775e-07, |
| "logps/chosen": -46.45621109008789, |
| "logps/rejected": -53.37653350830078, |
| "loss": 0.6258, |
| "losses/dpo": 0.5177885293960571, |
| "losses/sft": 1.4505321979522705, |
| "losses/total": 0.5177885293960571, |
| "ref_logps/chosen": -38.40946960449219, |
| "ref_logps/rejected": -42.48009490966797, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": -0.8046744465827942, |
| "rewards/margins": 0.2849688231945038, |
| "rewards/rejected": -1.0896432399749756, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.66, |
| "grad_norm": 6.775480921328623, |
| "learning_rate": 2.4719101123595505e-07, |
| "logps/chosen": -43.19866943359375, |
| "logps/rejected": -51.142852783203125, |
| "loss": 0.5708, |
| "losses/dpo": 0.631821870803833, |
| "losses/sft": 1.687159776687622, |
| "losses/total": 0.631821870803833, |
| "ref_logps/chosen": -35.93583297729492, |
| "ref_logps/rejected": -40.11430358886719, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7262836694717407, |
| "rewards/margins": 0.3765709102153778, |
| "rewards/rejected": -1.102854609489441, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 7.00534024427554, |
| "learning_rate": 2.4578651685393255e-07, |
| "logps/chosen": -43.32523727416992, |
| "logps/rejected": -52.18841552734375, |
| "loss": 0.56, |
| "losses/dpo": 0.5959673523902893, |
| "losses/sft": 1.5886725187301636, |
| "losses/total": 0.5959673523902893, |
| "ref_logps/chosen": -35.884517669677734, |
| "ref_logps/rejected": -40.54563522338867, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7440718412399292, |
| "rewards/margins": 0.42020630836486816, |
| "rewards/rejected": -1.1642781496047974, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 7.150852349968827, |
| "learning_rate": 2.443820224719101e-07, |
| "logps/chosen": -42.47400665283203, |
| "logps/rejected": -53.8497314453125, |
| "loss": 0.5456, |
| "losses/dpo": 0.5008928775787354, |
| "losses/sft": 1.4967145919799805, |
| "losses/total": 0.5008928775787354, |
| "ref_logps/chosen": -35.4063720703125, |
| "ref_logps/rejected": -41.911190032958984, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.7067632675170898, |
| "rewards/margins": 0.48709067702293396, |
| "rewards/rejected": -1.1938539743423462, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.68, |
| "grad_norm": 7.009242529601585, |
| "learning_rate": 2.429775280898876e-07, |
| "logps/chosen": -42.480735778808594, |
| "logps/rejected": -56.96538543701172, |
| "loss": 0.5687, |
| "losses/dpo": 0.5494006872177124, |
| "losses/sft": 1.660073161125183, |
| "losses/total": 0.5494006872177124, |
| "ref_logps/chosen": -34.94923400878906, |
| "ref_logps/rejected": -45.03327178955078, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.753150224685669, |
| "rewards/margins": 0.44006073474884033, |
| "rewards/rejected": -1.1932109594345093, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 7.145198782494123, |
| "learning_rate": 2.4157303370786517e-07, |
| "logps/chosen": -46.52253341674805, |
| "logps/rejected": -56.88560485839844, |
| "loss": 0.5578, |
| "losses/dpo": 0.6753450632095337, |
| "losses/sft": 1.733784556388855, |
| "losses/total": 0.6753450632095337, |
| "ref_logps/chosen": -38.87651062011719, |
| "ref_logps/rejected": -44.95452117919922, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7646023035049438, |
| "rewards/margins": 0.428506076335907, |
| "rewards/rejected": -1.193108320236206, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 7.2140738897995895, |
| "learning_rate": 2.401685393258427e-07, |
| "logps/chosen": -44.606842041015625, |
| "logps/rejected": -51.53977966308594, |
| "loss": 0.5755, |
| "losses/dpo": 0.6050464510917664, |
| "losses/sft": 1.4844509363174438, |
| "losses/total": 0.6050464510917664, |
| "ref_logps/chosen": -37.366485595703125, |
| "ref_logps/rejected": -40.17390441894531, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7240351438522339, |
| "rewards/margins": 0.41255253553390503, |
| "rewards/rejected": -1.1365876197814941, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 7.316056461598082, |
| "learning_rate": 2.3876404494382023e-07, |
| "logps/chosen": -43.40976333618164, |
| "logps/rejected": -51.119468688964844, |
| "loss": 0.5856, |
| "losses/dpo": 0.6437182426452637, |
| "losses/sft": 1.6879228353500366, |
| "losses/total": 0.6437182426452637, |
| "ref_logps/chosen": -35.9763298034668, |
| "ref_logps/rejected": -39.941932678222656, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.7433432936668396, |
| "rewards/margins": 0.37441009283065796, |
| "rewards/rejected": -1.1177533864974976, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 7.078331857989057, |
| "learning_rate": 2.3735955056179774e-07, |
| "logps/chosen": -45.81120681762695, |
| "logps/rejected": -50.85576629638672, |
| "loss": 0.6076, |
| "losses/dpo": 0.7625922560691833, |
| "losses/sft": 1.5723658800125122, |
| "losses/total": 0.7625922560691833, |
| "ref_logps/chosen": -38.21784973144531, |
| "ref_logps/rejected": -40.007240295410156, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7593356966972351, |
| "rewards/margins": 0.32551684975624084, |
| "rewards/rejected": -1.0848525762557983, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 7.206138039626543, |
| "learning_rate": 2.3595505617977527e-07, |
| "logps/chosen": -43.7403450012207, |
| "logps/rejected": -52.108604431152344, |
| "loss": 0.5922, |
| "losses/dpo": 0.5139514803886414, |
| "losses/sft": 1.6670148372650146, |
| "losses/total": 0.5139514803886414, |
| "ref_logps/chosen": -35.650115966796875, |
| "ref_logps/rejected": -40.582130432128906, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.8090231418609619, |
| "rewards/margins": 0.34362420439720154, |
| "rewards/rejected": -1.1526473760604858, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 7.61900579513634, |
| "learning_rate": 2.345505617977528e-07, |
| "logps/chosen": -42.43614959716797, |
| "logps/rejected": -52.779483795166016, |
| "loss": 0.5781, |
| "losses/dpo": 0.4573014974594116, |
| "losses/sft": 1.5003488063812256, |
| "losses/total": 0.4573014974594116, |
| "ref_logps/chosen": -34.90065002441406, |
| "ref_logps/rejected": -41.4276237487793, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.7535501718521118, |
| "rewards/margins": 0.38163578510284424, |
| "rewards/rejected": -1.135185956954956, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 7.829509763007773, |
| "learning_rate": 2.331460674157303e-07, |
| "logps/chosen": -47.7276496887207, |
| "logps/rejected": -56.36402893066406, |
| "loss": 0.5302, |
| "losses/dpo": 0.529563307762146, |
| "losses/sft": 1.6256301403045654, |
| "losses/total": 0.529563307762146, |
| "ref_logps/chosen": -40.326351165771484, |
| "ref_logps/rejected": -43.704612731933594, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.740129828453064, |
| "rewards/margins": 0.5258119702339172, |
| "rewards/rejected": -1.2659417390823364, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 8.06687120109026, |
| "learning_rate": 2.3174157303370786e-07, |
| "logps/chosen": -44.74425506591797, |
| "logps/rejected": -55.536312103271484, |
| "loss": 0.561, |
| "losses/dpo": 0.4758527874946594, |
| "losses/sft": 1.3779159784317017, |
| "losses/total": 0.4758527874946594, |
| "ref_logps/chosen": -37.057281494140625, |
| "ref_logps/rejected": -43.34575653076172, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7686972618103027, |
| "rewards/margins": 0.45035821199417114, |
| "rewards/rejected": -1.2190555334091187, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 7.291686332994008, |
| "learning_rate": 2.303370786516854e-07, |
| "logps/chosen": -43.00548553466797, |
| "logps/rejected": -54.49897003173828, |
| "loss": 0.5834, |
| "losses/dpo": 0.5421339273452759, |
| "losses/sft": 1.4051011800765991, |
| "losses/total": 0.5421339273452759, |
| "ref_logps/chosen": -35.81233215332031, |
| "ref_logps/rejected": -43.77638244628906, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7193150520324707, |
| "rewards/margins": 0.35294392704963684, |
| "rewards/rejected": -1.0722589492797852, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 6.669119014424567, |
| "learning_rate": 2.2893258426966292e-07, |
| "logps/chosen": -42.595909118652344, |
| "logps/rejected": -50.517574310302734, |
| "loss": 0.5942, |
| "losses/dpo": 0.5890272855758667, |
| "losses/sft": 1.3421604633331299, |
| "losses/total": 0.5890272855758667, |
| "ref_logps/chosen": -35.23419189453125, |
| "ref_logps/rejected": -39.80196762084961, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.7361720204353333, |
| "rewards/margins": 0.33538877964019775, |
| "rewards/rejected": -1.0715608596801758, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 7.634998140383259, |
| "learning_rate": 2.2752808988764045e-07, |
| "logps/chosen": -48.97822189331055, |
| "logps/rejected": -55.01988983154297, |
| "loss": 0.6041, |
| "losses/dpo": 0.4961914122104645, |
| "losses/sft": 1.6347143650054932, |
| "losses/total": 0.4961914122104645, |
| "ref_logps/chosen": -40.457027435302734, |
| "ref_logps/rejected": -43.25347900390625, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.8521193265914917, |
| "rewards/margins": 0.3245222866535187, |
| "rewards/rejected": -1.1766417026519775, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 7.773471295460603, |
| "learning_rate": 2.2612359550561795e-07, |
| "logps/chosen": -46.672576904296875, |
| "logps/rejected": -54.91902542114258, |
| "loss": 0.5883, |
| "losses/dpo": 0.5112382173538208, |
| "losses/sft": 1.6069546937942505, |
| "losses/total": 0.5112382173538208, |
| "ref_logps/chosen": -38.114097595214844, |
| "ref_logps/rejected": -42.68096160888672, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8558481931686401, |
| "rewards/margins": 0.36795809864997864, |
| "rewards/rejected": -1.2238062620162964, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 6.9224359595021925, |
| "learning_rate": 2.2471910112359549e-07, |
| "logps/chosen": -43.26789855957031, |
| "logps/rejected": -49.846065521240234, |
| "loss": 0.5976, |
| "losses/dpo": 0.530718207359314, |
| "losses/sft": 1.4825395345687866, |
| "losses/total": 0.530718207359314, |
| "ref_logps/chosen": -35.522216796875, |
| "ref_logps/rejected": -38.95735168457031, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7745683193206787, |
| "rewards/margins": 0.3143025040626526, |
| "rewards/rejected": -1.088870882987976, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 7.5835387702946075, |
| "learning_rate": 2.2331460674157302e-07, |
| "logps/chosen": -44.358116149902344, |
| "logps/rejected": -57.51253890991211, |
| "loss": 0.5667, |
| "losses/dpo": 0.4763038754463196, |
| "losses/sft": 1.4994385242462158, |
| "losses/total": 0.4763038754463196, |
| "ref_logps/chosen": -36.64021682739258, |
| "ref_logps/rejected": -45.455902099609375, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7717897295951843, |
| "rewards/margins": 0.4338740408420563, |
| "rewards/rejected": -1.205663800239563, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 6.860345248290717, |
| "learning_rate": 2.2191011235955055e-07, |
| "logps/chosen": -43.448211669921875, |
| "logps/rejected": -52.67967224121094, |
| "loss": 0.5841, |
| "losses/dpo": 0.5856455564498901, |
| "losses/sft": 1.5493735074996948, |
| "losses/total": 0.5856455564498901, |
| "ref_logps/chosen": -35.36868667602539, |
| "ref_logps/rejected": -40.977325439453125, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.8079524040222168, |
| "rewards/margins": 0.36228203773498535, |
| "rewards/rejected": -1.1702344417572021, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 6.877362645097382, |
| "learning_rate": 2.205056179775281e-07, |
| "logps/chosen": -43.96727752685547, |
| "logps/rejected": -54.08544921875, |
| "loss": 0.561, |
| "losses/dpo": 0.7524189352989197, |
| "losses/sft": 1.4943475723266602, |
| "losses/total": 0.7524189352989197, |
| "ref_logps/chosen": -36.482398986816406, |
| "ref_logps/rejected": -42.11739730834961, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.7484874725341797, |
| "rewards/margins": 0.44831788539886475, |
| "rewards/rejected": -1.1968053579330444, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 8.176287850888796, |
| "learning_rate": 2.191011235955056e-07, |
| "logps/chosen": -44.187679290771484, |
| "logps/rejected": -52.56245422363281, |
| "loss": 0.6215, |
| "losses/dpo": 0.5882298946380615, |
| "losses/sft": 1.509756326675415, |
| "losses/total": 0.5882298946380615, |
| "ref_logps/chosen": -35.26359558105469, |
| "ref_logps/rejected": -40.418216705322266, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.8924084901809692, |
| "rewards/margins": 0.32201528549194336, |
| "rewards/rejected": -1.2144238948822021, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.82, |
| "grad_norm": 6.748177391589757, |
| "learning_rate": 2.1769662921348314e-07, |
| "logps/chosen": -44.592193603515625, |
| "logps/rejected": -54.5892219543457, |
| "loss": 0.5454, |
| "losses/dpo": 0.5010501742362976, |
| "losses/sft": 1.692970871925354, |
| "losses/total": 0.5010501742362976, |
| "ref_logps/chosen": -36.254737854003906, |
| "ref_logps/rejected": -41.49382019042969, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8337457180023193, |
| "rewards/margins": 0.47579440474510193, |
| "rewards/rejected": -1.3095402717590332, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 7.9524609339083385, |
| "learning_rate": 2.1629213483146067e-07, |
| "logps/chosen": -49.09219741821289, |
| "logps/rejected": -55.76482391357422, |
| "loss": 0.6033, |
| "losses/dpo": 0.5440762042999268, |
| "losses/sft": 1.7360166311264038, |
| "losses/total": 0.5440762042999268, |
| "ref_logps/chosen": -39.933494567871094, |
| "ref_logps/rejected": -43.29194259643555, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.915870189666748, |
| "rewards/margins": 0.33141782879829407, |
| "rewards/rejected": -1.2472879886627197, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 7.715847211838437, |
| "learning_rate": 2.148876404494382e-07, |
| "logps/chosen": -44.42055130004883, |
| "logps/rejected": -50.31025314331055, |
| "loss": 0.6419, |
| "losses/dpo": 0.6423018574714661, |
| "losses/sft": 1.8698339462280273, |
| "losses/total": 0.6423018574714661, |
| "ref_logps/chosen": -36.14445114135742, |
| "ref_logps/rejected": -39.49055480957031, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.8276099562644958, |
| "rewards/margins": 0.2543600797653198, |
| "rewards/rejected": -1.081969976425171, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.84, |
| "grad_norm": 7.717852694753547, |
| "learning_rate": 2.134831460674157e-07, |
| "logps/chosen": -45.71333694458008, |
| "logps/rejected": -56.663360595703125, |
| "loss": 0.5667, |
| "losses/dpo": 0.578036904335022, |
| "losses/sft": 1.548266053199768, |
| "losses/total": 0.578036904335022, |
| "ref_logps/chosen": -37.3514404296875, |
| "ref_logps/rejected": -44.125831604003906, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.8361901044845581, |
| "rewards/margins": 0.4175630807876587, |
| "rewards/rejected": -1.2537531852722168, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 7.217778469739938, |
| "learning_rate": 2.1207865168539323e-07, |
| "logps/chosen": -47.56121826171875, |
| "logps/rejected": -55.15635299682617, |
| "loss": 0.584, |
| "losses/dpo": 0.6499341726303101, |
| "losses/sft": 1.8146308660507202, |
| "losses/total": 0.6499341726303101, |
| "ref_logps/chosen": -39.167484283447266, |
| "ref_logps/rejected": -43.06206130981445, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8393731117248535, |
| "rewards/margins": 0.37005579471588135, |
| "rewards/rejected": -1.2094289064407349, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 7.706531034729977, |
| "learning_rate": 2.1067415730337076e-07, |
| "logps/chosen": -45.61647415161133, |
| "logps/rejected": -55.11760330200195, |
| "loss": 0.616, |
| "losses/dpo": 0.45806318521499634, |
| "losses/sft": 1.4561158418655396, |
| "losses/total": 0.45806318521499634, |
| "ref_logps/chosen": -36.999488830566406, |
| "ref_logps/rejected": -43.26171112060547, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.8616988062858582, |
| "rewards/margins": 0.3238902986049652, |
| "rewards/rejected": -1.185589075088501, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 7.805010195142929, |
| "learning_rate": 2.0926966292134832e-07, |
| "logps/chosen": -43.88758087158203, |
| "logps/rejected": -54.20425033569336, |
| "loss": 0.5977, |
| "losses/dpo": 0.6152101755142212, |
| "losses/sft": 1.5027949810028076, |
| "losses/total": 0.6152101755142212, |
| "ref_logps/chosen": -35.95214080810547, |
| "ref_logps/rejected": -42.84498977661133, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7935442924499512, |
| "rewards/margins": 0.3423812687397003, |
| "rewards/rejected": -1.135925531387329, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 7.406497027707841, |
| "learning_rate": 2.0786516853932585e-07, |
| "logps/chosen": -45.788818359375, |
| "logps/rejected": -53.047203063964844, |
| "loss": 0.5831, |
| "losses/dpo": 0.4654901325702667, |
| "losses/sft": 1.479446291923523, |
| "losses/total": 0.4654901325702667, |
| "ref_logps/chosen": -37.57151794433594, |
| "ref_logps/rejected": -41.39836883544922, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.8217304348945618, |
| "rewards/margins": 0.34315240383148193, |
| "rewards/rejected": -1.1648828983306885, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 7.682816189246604, |
| "learning_rate": 2.0646067415730336e-07, |
| "logps/chosen": -45.08941650390625, |
| "logps/rejected": -56.03681182861328, |
| "loss": 0.6254, |
| "losses/dpo": 0.6420303583145142, |
| "losses/sft": 1.767283320426941, |
| "losses/total": 0.6420303583145142, |
| "ref_logps/chosen": -35.47539138793945, |
| "ref_logps/rejected": -43.610809326171875, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.9614025950431824, |
| "rewards/margins": 0.281198114156723, |
| "rewards/rejected": -1.242600679397583, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 7.381909400967013, |
| "learning_rate": 2.0505617977528089e-07, |
| "logps/chosen": -44.02425765991211, |
| "logps/rejected": -57.3465461730957, |
| "loss": 0.552, |
| "losses/dpo": 0.5316831469535828, |
| "losses/sft": 1.4193787574768066, |
| "losses/total": 0.5316831469535828, |
| "ref_logps/chosen": -36.48744201660156, |
| "ref_logps/rejected": -45.4715576171875, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7536818385124207, |
| "rewards/margins": 0.43381738662719727, |
| "rewards/rejected": -1.1874991655349731, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.89, |
| "grad_norm": 7.457988261963576, |
| "learning_rate": 2.0365168539325842e-07, |
| "logps/chosen": -44.05774688720703, |
| "logps/rejected": -54.26824951171875, |
| "loss": 0.5622, |
| "losses/dpo": 0.6149340867996216, |
| "losses/sft": 1.7144936323165894, |
| "losses/total": 0.6149340867996216, |
| "ref_logps/chosen": -35.81959533691406, |
| "ref_logps/rejected": -41.808128356933594, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8238149881362915, |
| "rewards/margins": 0.4221975803375244, |
| "rewards/rejected": -1.246012568473816, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 6.886174599694686, |
| "learning_rate": 2.0224719101123595e-07, |
| "logps/chosen": -42.96266174316406, |
| "logps/rejected": -57.41224670410156, |
| "loss": 0.5338, |
| "losses/dpo": 0.5937738418579102, |
| "losses/sft": 1.7894150018692017, |
| "losses/total": 0.5937738418579102, |
| "ref_logps/chosen": -35.350067138671875, |
| "ref_logps/rejected": -44.520416259765625, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.7612596750259399, |
| "rewards/margins": 0.5279234051704407, |
| "rewards/rejected": -1.2891831398010254, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 7.229890008822798, |
| "learning_rate": 2.0084269662921348e-07, |
| "logps/chosen": -40.15806198120117, |
| "logps/rejected": -51.43259811401367, |
| "loss": 0.5704, |
| "losses/dpo": 0.6577882170677185, |
| "losses/sft": 1.8345617055892944, |
| "losses/total": 0.6577882170677185, |
| "ref_logps/chosen": -32.859107971191406, |
| "ref_logps/rejected": -40.261077880859375, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7298952341079712, |
| "rewards/margins": 0.3872564733028412, |
| "rewards/rejected": -1.1171517372131348, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 7.4822334369379995, |
| "learning_rate": 1.9943820224719098e-07, |
| "logps/chosen": -47.686946868896484, |
| "logps/rejected": -57.150779724121094, |
| "loss": 0.5379, |
| "losses/dpo": 0.5903155207633972, |
| "losses/sft": 1.7529627084732056, |
| "losses/total": 0.5903155207633972, |
| "ref_logps/chosen": -39.64442443847656, |
| "ref_logps/rejected": -44.537864685058594, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8042521476745605, |
| "rewards/margins": 0.45703911781311035, |
| "rewards/rejected": -1.2612911462783813, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 8.026318217758316, |
| "learning_rate": 1.9803370786516854e-07, |
| "logps/chosen": -48.09050750732422, |
| "logps/rejected": -55.54762268066406, |
| "loss": 0.6168, |
| "losses/dpo": 0.6026707887649536, |
| "losses/sft": 1.538877248764038, |
| "losses/total": 0.6026707887649536, |
| "ref_logps/chosen": -39.93636703491211, |
| "ref_logps/rejected": -44.63798522949219, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.8154144287109375, |
| "rewards/margins": 0.2755492627620697, |
| "rewards/rejected": -1.0909637212753296, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.93, |
| "grad_norm": 7.789382763460605, |
| "learning_rate": 1.9662921348314607e-07, |
| "logps/chosen": -42.904396057128906, |
| "logps/rejected": -52.95304489135742, |
| "loss": 0.6004, |
| "losses/dpo": 0.6533941626548767, |
| "losses/sft": 1.7555681467056274, |
| "losses/total": 0.6533941626548767, |
| "ref_logps/chosen": -35.12152862548828, |
| "ref_logps/rejected": -41.70171356201172, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.7782862186431885, |
| "rewards/margins": 0.34684672951698303, |
| "rewards/rejected": -1.1251329183578491, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 7.650755358628509, |
| "learning_rate": 1.952247191011236e-07, |
| "logps/chosen": -47.547119140625, |
| "logps/rejected": -55.00044250488281, |
| "loss": 0.5857, |
| "losses/dpo": 0.533769965171814, |
| "losses/sft": 1.518601655960083, |
| "losses/total": 0.533769965171814, |
| "ref_logps/chosen": -39.58103942871094, |
| "ref_logps/rejected": -43.044471740722656, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7966080904006958, |
| "rewards/margins": 0.3989890217781067, |
| "rewards/rejected": -1.1955971717834473, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 7.218761250045399, |
| "learning_rate": 1.938202247191011e-07, |
| "logps/chosen": -45.877933502197266, |
| "logps/rejected": -55.09804916381836, |
| "loss": 0.5628, |
| "losses/dpo": 0.5916406512260437, |
| "losses/sft": 1.787639856338501, |
| "losses/total": 0.5916406512260437, |
| "ref_logps/chosen": -37.8803596496582, |
| "ref_logps/rejected": -42.772945404052734, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.7997570633888245, |
| "rewards/margins": 0.43275338411331177, |
| "rewards/rejected": -1.2325104475021362, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 6.746342603050737, |
| "learning_rate": 1.9241573033707863e-07, |
| "logps/chosen": -44.5426139831543, |
| "logps/rejected": -52.97711944580078, |
| "loss": 0.5404, |
| "losses/dpo": 0.6297707557678223, |
| "losses/sft": 1.9339282512664795, |
| "losses/total": 0.6297707557678223, |
| "ref_logps/chosen": -36.8856315612793, |
| "ref_logps/rejected": -40.58320617675781, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7656983137130737, |
| "rewards/margins": 0.47369277477264404, |
| "rewards/rejected": -1.2393909692764282, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 7.487018325482117, |
| "learning_rate": 1.9101123595505617e-07, |
| "logps/chosen": -42.5137825012207, |
| "logps/rejected": -53.166908264160156, |
| "loss": 0.5707, |
| "losses/dpo": 0.7028491497039795, |
| "losses/sft": 1.704848289489746, |
| "losses/total": 0.7028491497039795, |
| "ref_logps/chosen": -34.8709716796875, |
| "ref_logps/rejected": -41.41071701049805, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7642812728881836, |
| "rewards/margins": 0.4113379120826721, |
| "rewards/rejected": -1.1756192445755005, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 6.897909731781275, |
| "learning_rate": 1.896067415730337e-07, |
| "logps/chosen": -42.64958190917969, |
| "logps/rejected": -54.01194763183594, |
| "loss": 0.5508, |
| "losses/dpo": 0.6007636785507202, |
| "losses/sft": 1.6722173690795898, |
| "losses/total": 0.6007636785507202, |
| "ref_logps/chosen": -34.81106948852539, |
| "ref_logps/rejected": -41.885292053222656, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7838513851165771, |
| "rewards/margins": 0.428814560174942, |
| "rewards/rejected": -1.2126659154891968, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 6.802922485274152, |
| "learning_rate": 1.8820224719101123e-07, |
| "logps/chosen": -40.00798034667969, |
| "logps/rejected": -54.30394744873047, |
| "loss": 0.5499, |
| "losses/dpo": 0.39324456453323364, |
| "losses/sft": 1.4311751127243042, |
| "losses/total": 0.39324456453323364, |
| "ref_logps/chosen": -32.748207092285156, |
| "ref_logps/rejected": -42.49259948730469, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.7259770035743713, |
| "rewards/margins": 0.4551584720611572, |
| "rewards/rejected": -1.1811354160308838, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 6.783836899709174, |
| "learning_rate": 1.8679775280898876e-07, |
| "logps/chosen": -39.83095932006836, |
| "logps/rejected": -54.880165100097656, |
| "loss": 0.5218, |
| "losses/dpo": 0.5562885999679565, |
| "losses/sft": 1.581786036491394, |
| "losses/total": 0.5562885999679565, |
| "ref_logps/chosen": -32.503440856933594, |
| "ref_logps/rejected": -42.00752258300781, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.7327523231506348, |
| "rewards/margins": 0.5545117259025574, |
| "rewards/rejected": -1.287264108657837, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 7.863916442503097, |
| "learning_rate": 1.853932584269663e-07, |
| "logps/chosen": -50.77809524536133, |
| "logps/rejected": -57.57705307006836, |
| "loss": 0.5746, |
| "losses/dpo": 0.5502392053604126, |
| "losses/sft": 1.671476125717163, |
| "losses/total": 0.5502392053604126, |
| "ref_logps/chosen": -42.0257568359375, |
| "ref_logps/rejected": -44.61543273925781, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8752338886260986, |
| "rewards/margins": 0.42092812061309814, |
| "rewards/rejected": -1.2961618900299072, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 7.397057411594154, |
| "learning_rate": 1.8398876404494382e-07, |
| "logps/chosen": -45.31150817871094, |
| "logps/rejected": -54.171669006347656, |
| "loss": 0.5905, |
| "losses/dpo": 0.5974111557006836, |
| "losses/sft": 1.7264142036437988, |
| "losses/total": 0.5974111557006836, |
| "ref_logps/chosen": -36.78529357910156, |
| "ref_logps/rejected": -41.80047607421875, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.8526214361190796, |
| "rewards/margins": 0.3844982385635376, |
| "rewards/rejected": -1.2371195554733276, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.01, |
| "grad_norm": 7.017591256124865, |
| "learning_rate": 1.8258426966292135e-07, |
| "logps/chosen": -43.99406433105469, |
| "logps/rejected": -53.245262145996094, |
| "loss": 0.5349, |
| "losses/dpo": 0.5464926362037659, |
| "losses/sft": 1.5807067155838013, |
| "losses/total": 0.5464926362037659, |
| "ref_logps/chosen": -36.38019561767578, |
| "ref_logps/rejected": -40.543190002441406, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.761387288570404, |
| "rewards/margins": 0.5088198184967041, |
| "rewards/rejected": -1.2702070474624634, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.02, |
| "grad_norm": 6.985219378495969, |
| "learning_rate": 1.8117977528089888e-07, |
| "logps/chosen": -44.4912109375, |
| "logps/rejected": -53.347251892089844, |
| "loss": 0.5743, |
| "losses/dpo": 0.5905557870864868, |
| "losses/sft": 1.7700715065002441, |
| "losses/total": 0.5905557870864868, |
| "ref_logps/chosen": -36.70296859741211, |
| "ref_logps/rejected": -41.48924255371094, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7788243889808655, |
| "rewards/margins": 0.40697669982910156, |
| "rewards/rejected": -1.1858012676239014, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.02, |
| "grad_norm": 7.021871568299538, |
| "learning_rate": 1.7977528089887638e-07, |
| "logps/chosen": -41.247291564941406, |
| "logps/rejected": -50.476539611816406, |
| "loss": 0.5583, |
| "losses/dpo": 0.5536283850669861, |
| "losses/sft": 1.3929085731506348, |
| "losses/total": 0.5536283850669861, |
| "ref_logps/chosen": -34.45301055908203, |
| "ref_logps/rejected": -39.05378723144531, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.6794286370277405, |
| "rewards/margins": 0.46284645795822144, |
| "rewards/rejected": -1.142275094985962, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.03, |
| "grad_norm": 7.290879700745406, |
| "learning_rate": 1.7837078651685391e-07, |
| "logps/chosen": -44.69060516357422, |
| "logps/rejected": -52.723419189453125, |
| "loss": 0.5862, |
| "losses/dpo": 0.5445826053619385, |
| "losses/sft": 1.8489296436309814, |
| "losses/total": 0.5445826053619385, |
| "ref_logps/chosen": -36.05701446533203, |
| "ref_logps/rejected": -40.36290740966797, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.863358736038208, |
| "rewards/margins": 0.37269291281700134, |
| "rewards/rejected": -1.2360515594482422, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 7.278112500918291, |
| "learning_rate": 1.7696629213483144e-07, |
| "logps/chosen": -47.17387771606445, |
| "logps/rejected": -55.31304168701172, |
| "loss": 0.5451, |
| "losses/dpo": 0.4929129481315613, |
| "losses/sft": 1.2738251686096191, |
| "losses/total": 0.4929129481315613, |
| "ref_logps/chosen": -38.97121810913086, |
| "ref_logps/rejected": -42.48224639892578, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.8202658891677856, |
| "rewards/margins": 0.4628136157989502, |
| "rewards/rejected": -1.2830795049667358, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 7.2790694246, |
| "learning_rate": 1.75561797752809e-07, |
| "logps/chosen": -39.39592742919922, |
| "logps/rejected": -49.23228073120117, |
| "loss": 0.5839, |
| "losses/dpo": 0.5500213503837585, |
| "losses/sft": 1.5326621532440186, |
| "losses/total": 0.5500213503837585, |
| "ref_logps/chosen": -31.87863540649414, |
| "ref_logps/rejected": -37.98380661010742, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.7517290115356445, |
| "rewards/margins": 0.37311792373657227, |
| "rewards/rejected": -1.1248469352722168, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 6.952262857114201, |
| "learning_rate": 1.741573033707865e-07, |
| "logps/chosen": -41.979820251464844, |
| "logps/rejected": -51.27606964111328, |
| "loss": 0.5477, |
| "losses/dpo": 0.5781035423278809, |
| "losses/sft": 1.6893967390060425, |
| "losses/total": 0.5781035423278809, |
| "ref_logps/chosen": -34.47309875488281, |
| "ref_logps/rejected": -38.76087951660156, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7506722211837769, |
| "rewards/margins": 0.5008465051651001, |
| "rewards/rejected": -1.251518726348877, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.06, |
| "grad_norm": 7.323213695486467, |
| "learning_rate": 1.7275280898876404e-07, |
| "logps/chosen": -46.38153839111328, |
| "logps/rejected": -57.915809631347656, |
| "loss": 0.5002, |
| "losses/dpo": 0.5204892754554749, |
| "losses/sft": 1.5103009939193726, |
| "losses/total": 0.5204892754554749, |
| "ref_logps/chosen": -38.351890563964844, |
| "ref_logps/rejected": -43.90599060058594, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8029646873474121, |
| "rewards/margins": 0.5980167388916016, |
| "rewards/rejected": -1.4009814262390137, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.07, |
| "grad_norm": 7.764155916683402, |
| "learning_rate": 1.7134831460674157e-07, |
| "logps/chosen": -45.19919967651367, |
| "logps/rejected": -51.13863754272461, |
| "loss": 0.6283, |
| "losses/dpo": 0.616185188293457, |
| "losses/sft": 1.6811277866363525, |
| "losses/total": 0.616185188293457, |
| "ref_logps/chosen": -36.72953796386719, |
| "ref_logps/rejected": -39.85737228393555, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -0.8469663858413696, |
| "rewards/margins": 0.2811599373817444, |
| "rewards/rejected": -1.1281262636184692, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 7.020433782892144, |
| "learning_rate": 1.699438202247191e-07, |
| "logps/chosen": -43.26371765136719, |
| "logps/rejected": -52.273712158203125, |
| "loss": 0.5707, |
| "losses/dpo": 0.584823727607727, |
| "losses/sft": 1.7780404090881348, |
| "losses/total": 0.584823727607727, |
| "ref_logps/chosen": -34.843936920166016, |
| "ref_logps/rejected": -39.69860076904297, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8419777154922485, |
| "rewards/margins": 0.4155334234237671, |
| "rewards/rejected": -1.2575111389160156, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 7.92264626489854, |
| "learning_rate": 1.6853932584269663e-07, |
| "logps/chosen": -47.98881912231445, |
| "logps/rejected": -56.07038116455078, |
| "loss": 0.5876, |
| "losses/dpo": 0.4782869219779968, |
| "losses/sft": 1.5796866416931152, |
| "losses/total": 0.4782869219779968, |
| "ref_logps/chosen": -39.32164764404297, |
| "ref_logps/rejected": -43.3940315246582, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.8667174577713013, |
| "rewards/margins": 0.40091750025749207, |
| "rewards/rejected": -1.2676348686218262, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.09, |
| "grad_norm": 6.857885259771192, |
| "learning_rate": 1.6713483146067413e-07, |
| "logps/chosen": -42.90391159057617, |
| "logps/rejected": -53.66696548461914, |
| "loss": 0.5666, |
| "losses/dpo": 0.6092857122421265, |
| "losses/sft": 1.6311126947402954, |
| "losses/total": 0.6092857122421265, |
| "ref_logps/chosen": -35.07619857788086, |
| "ref_logps/rejected": -41.65775680541992, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.7827714085578918, |
| "rewards/margins": 0.4181497395038605, |
| "rewards/rejected": -1.2009210586547852, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 7.0081480343548215, |
| "learning_rate": 1.6573033707865166e-07, |
| "logps/chosen": -43.48851013183594, |
| "logps/rejected": -56.560142517089844, |
| "loss": 0.5552, |
| "losses/dpo": 0.47724148631095886, |
| "losses/sft": 1.4892723560333252, |
| "losses/total": 0.47724148631095886, |
| "ref_logps/chosen": -35.48023223876953, |
| "ref_logps/rejected": -43.66110610961914, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.8008283376693726, |
| "rewards/margins": 0.4890754222869873, |
| "rewards/rejected": -1.2899038791656494, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 7.160682409155752, |
| "learning_rate": 1.6432584269662922e-07, |
| "logps/chosen": -44.21363830566406, |
| "logps/rejected": -58.09941864013672, |
| "loss": 0.5245, |
| "losses/dpo": 0.4419279396533966, |
| "losses/sft": 1.6377503871917725, |
| "losses/total": 0.4419279396533966, |
| "ref_logps/chosen": -36.17689514160156, |
| "ref_logps/rejected": -44.57625198364258, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8036742210388184, |
| "rewards/margins": 0.5486425757408142, |
| "rewards/rejected": -1.3523168563842773, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 7.212324015356224, |
| "learning_rate": 1.6292134831460675e-07, |
| "logps/chosen": -44.50836181640625, |
| "logps/rejected": -51.84413528442383, |
| "loss": 0.5731, |
| "losses/dpo": 0.4934471547603607, |
| "losses/sft": 1.4699177742004395, |
| "losses/total": 0.4934471547603607, |
| "ref_logps/chosen": -36.38746643066406, |
| "ref_logps/rejected": -39.50672149658203, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.8120898604393005, |
| "rewards/margins": 0.4216514825820923, |
| "rewards/rejected": -1.233741283416748, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 6.532989304583127, |
| "learning_rate": 1.6151685393258428e-07, |
| "logps/chosen": -40.479827880859375, |
| "logps/rejected": -52.349693298339844, |
| "loss": 0.5111, |
| "losses/dpo": 0.5451053380966187, |
| "losses/sft": 1.5731171369552612, |
| "losses/total": 0.5451053380966187, |
| "ref_logps/chosen": -33.727699279785156, |
| "ref_logps/rejected": -40.0911865234375, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.6752126216888428, |
| "rewards/margins": 0.5506378412246704, |
| "rewards/rejected": -1.2258504629135132, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.13, |
| "grad_norm": 6.65644378559116, |
| "learning_rate": 1.6011235955056178e-07, |
| "logps/chosen": -42.37626266479492, |
| "logps/rejected": -53.92717742919922, |
| "loss": 0.5305, |
| "losses/dpo": 0.523646354675293, |
| "losses/sft": 1.5671043395996094, |
| "losses/total": 0.523646354675293, |
| "ref_logps/chosen": -35.16848373413086, |
| "ref_logps/rejected": -41.62503433227539, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.7207781076431274, |
| "rewards/margins": 0.509436309337616, |
| "rewards/rejected": -1.2302143573760986, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 7.652354022803428, |
| "learning_rate": 1.5870786516853931e-07, |
| "logps/chosen": -45.83367919921875, |
| "logps/rejected": -57.50337219238281, |
| "loss": 0.5511, |
| "losses/dpo": 0.5657609105110168, |
| "losses/sft": 1.511309266090393, |
| "losses/total": 0.5657609105110168, |
| "ref_logps/chosen": -37.572113037109375, |
| "ref_logps/rejected": -44.38706970214844, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8261568546295166, |
| "rewards/margins": 0.4854734539985657, |
| "rewards/rejected": -1.3116302490234375, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 6.7860253446718, |
| "learning_rate": 1.5730337078651685e-07, |
| "logps/chosen": -41.25431823730469, |
| "logps/rejected": -54.90302658081055, |
| "loss": 0.5188, |
| "losses/dpo": 0.5078562498092651, |
| "losses/sft": 1.5500166416168213, |
| "losses/total": 0.5078562498092651, |
| "ref_logps/chosen": -33.7675666809082, |
| "ref_logps/rejected": -41.983367919921875, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.7486748695373535, |
| "rewards/margins": 0.5432910919189453, |
| "rewards/rejected": -1.2919659614562988, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 7.11903493041396, |
| "learning_rate": 1.5589887640449438e-07, |
| "logps/chosen": -43.894989013671875, |
| "logps/rejected": -58.60367202758789, |
| "loss": 0.4914, |
| "losses/dpo": 0.5028943419456482, |
| "losses/sft": 1.594357967376709, |
| "losses/total": 0.5028943419456482, |
| "ref_logps/chosen": -36.30883026123047, |
| "ref_logps/rejected": -45.05226516723633, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -0.7586159706115723, |
| "rewards/margins": 0.5965246558189392, |
| "rewards/rejected": -1.3551405668258667, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 7.693954508671863, |
| "learning_rate": 1.5449438202247188e-07, |
| "logps/chosen": -48.08583068847656, |
| "logps/rejected": -53.51144027709961, |
| "loss": 0.5882, |
| "losses/dpo": 0.8339239954948425, |
| "losses/sft": 1.617476224899292, |
| "losses/total": 0.8339239954948425, |
| "ref_logps/chosen": -40.02094268798828, |
| "ref_logps/rejected": -41.421348571777344, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.8064886331558228, |
| "rewards/margins": 0.4025205969810486, |
| "rewards/rejected": -1.2090092897415161, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 6.936991994103028, |
| "learning_rate": 1.5308988764044944e-07, |
| "logps/chosen": -42.915550231933594, |
| "logps/rejected": -54.570682525634766, |
| "loss": 0.5427, |
| "losses/dpo": 0.570111095905304, |
| "losses/sft": 1.7627439498901367, |
| "losses/total": 0.570111095905304, |
| "ref_logps/chosen": -34.83842468261719, |
| "ref_logps/rejected": -41.10365295410156, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8077125549316406, |
| "rewards/margins": 0.5389906167984009, |
| "rewards/rejected": -1.346703052520752, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 7.22270723761247, |
| "learning_rate": 1.5168539325842697e-07, |
| "logps/chosen": -42.23722457885742, |
| "logps/rejected": -57.404205322265625, |
| "loss": 0.529, |
| "losses/dpo": 0.5674354434013367, |
| "losses/sft": 1.5719692707061768, |
| "losses/total": 0.5674354434013367, |
| "ref_logps/chosen": -34.721920013427734, |
| "ref_logps/rejected": -44.73695373535156, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.7515305280685425, |
| "rewards/margins": 0.5151941180229187, |
| "rewards/rejected": -1.266724705696106, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.18, |
| "grad_norm": 8.53784031336331, |
| "learning_rate": 1.502808988764045e-07, |
| "logps/chosen": -48.22527313232422, |
| "logps/rejected": -58.22871398925781, |
| "loss": 0.5799, |
| "losses/dpo": 0.5890235900878906, |
| "losses/sft": 1.6156002283096313, |
| "losses/total": 0.5890235900878906, |
| "ref_logps/chosen": -39.35710525512695, |
| "ref_logps/rejected": -45.079322814941406, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.8868170976638794, |
| "rewards/margins": 0.4281224012374878, |
| "rewards/rejected": -1.3149394989013672, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.19, |
| "grad_norm": 6.885607484250047, |
| "learning_rate": 1.4887640449438203e-07, |
| "logps/chosen": -42.00331115722656, |
| "logps/rejected": -51.58038330078125, |
| "loss": 0.5568, |
| "losses/dpo": 0.602211058139801, |
| "losses/sft": 1.4960790872573853, |
| "losses/total": 0.602211058139801, |
| "ref_logps/chosen": -33.715057373046875, |
| "ref_logps/rejected": -39.065757751464844, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8288247585296631, |
| "rewards/margins": 0.422637403011322, |
| "rewards/rejected": -1.2514622211456299, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 6.966987508866502, |
| "learning_rate": 1.4747191011235953e-07, |
| "logps/chosen": -43.61931228637695, |
| "logps/rejected": -58.451629638671875, |
| "loss": 0.5594, |
| "losses/dpo": 0.5309076309204102, |
| "losses/sft": 1.636415958404541, |
| "losses/total": 0.5309076309204102, |
| "ref_logps/chosen": -34.87947463989258, |
| "ref_logps/rejected": -44.885459899902344, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.87398362159729, |
| "rewards/margins": 0.48263317346572876, |
| "rewards/rejected": -1.356616735458374, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 6.8192638080381744, |
| "learning_rate": 1.4606741573033706e-07, |
| "logps/chosen": -43.92414855957031, |
| "logps/rejected": -53.39807891845703, |
| "loss": 0.5452, |
| "losses/dpo": 0.6017537713050842, |
| "losses/sft": 1.7611263990402222, |
| "losses/total": 0.6017537713050842, |
| "ref_logps/chosen": -35.702823638916016, |
| "ref_logps/rejected": -40.34605407714844, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8221321702003479, |
| "rewards/margins": 0.4830705225467682, |
| "rewards/rejected": -1.3052027225494385, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.21, |
| "grad_norm": 8.710229032299473, |
| "learning_rate": 1.446629213483146e-07, |
| "logps/chosen": -51.8635368347168, |
| "logps/rejected": -58.34959030151367, |
| "loss": 0.601, |
| "losses/dpo": 0.5715082883834839, |
| "losses/sft": 1.490638017654419, |
| "losses/total": 0.5715082883834839, |
| "ref_logps/chosen": -42.414031982421875, |
| "ref_logps/rejected": -45.42848587036133, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.9449502229690552, |
| "rewards/margins": 0.3471601605415344, |
| "rewards/rejected": -1.2921103239059448, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.22, |
| "grad_norm": 7.6355542087700226, |
| "learning_rate": 1.4325842696629212e-07, |
| "logps/chosen": -43.83769607543945, |
| "logps/rejected": -58.36852264404297, |
| "loss": 0.546, |
| "losses/dpo": 0.4579807221889496, |
| "losses/sft": 1.5301527976989746, |
| "losses/total": 0.4579807221889496, |
| "ref_logps/chosen": -35.81403350830078, |
| "ref_logps/rejected": -44.5776252746582, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.802366316318512, |
| "rewards/margins": 0.5767236948013306, |
| "rewards/rejected": -1.3790900707244873, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 7.4022075570091195, |
| "learning_rate": 1.4185393258426968e-07, |
| "logps/chosen": -44.79059600830078, |
| "logps/rejected": -59.63528060913086, |
| "loss": 0.5251, |
| "losses/dpo": 0.5625388622283936, |
| "losses/sft": 1.5417966842651367, |
| "losses/total": 0.5625388622283936, |
| "ref_logps/chosen": -36.72273254394531, |
| "ref_logps/rejected": -46.061744689941406, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.806786060333252, |
| "rewards/margins": 0.5505677461624146, |
| "rewards/rejected": -1.357353925704956, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 7.092958234931924, |
| "learning_rate": 1.4044943820224718e-07, |
| "logps/chosen": -42.923343658447266, |
| "logps/rejected": -52.593894958496094, |
| "loss": 0.5582, |
| "losses/dpo": 0.46777036786079407, |
| "losses/sft": 1.5354235172271729, |
| "losses/total": 0.46777036786079407, |
| "ref_logps/chosen": -35.31480026245117, |
| "ref_logps/rejected": -40.524574279785156, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.7608542442321777, |
| "rewards/margins": 0.446077823638916, |
| "rewards/rejected": -1.2069320678710938, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 7.500648089064134, |
| "learning_rate": 1.3904494382022472e-07, |
| "logps/chosen": -43.400211334228516, |
| "logps/rejected": -54.485557556152344, |
| "loss": 0.5719, |
| "losses/dpo": 0.43427377939224243, |
| "losses/sft": 1.5346068143844604, |
| "losses/total": 0.43427377939224243, |
| "ref_logps/chosen": -35.220516204833984, |
| "ref_logps/rejected": -41.69098663330078, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.8179699182510376, |
| "rewards/margins": 0.4614875316619873, |
| "rewards/rejected": -1.2794575691223145, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 6.861133660639989, |
| "learning_rate": 1.3764044943820225e-07, |
| "logps/chosen": -40.74993896484375, |
| "logps/rejected": -55.73876190185547, |
| "loss": 0.5064, |
| "losses/dpo": 0.5779513716697693, |
| "losses/sft": 1.53359055519104, |
| "losses/total": 0.5779513716697693, |
| "ref_logps/chosen": -33.70279312133789, |
| "ref_logps/rejected": -42.673423767089844, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.7047147154808044, |
| "rewards/margins": 0.6018195152282715, |
| "rewards/rejected": -1.3065342903137207, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.26, |
| "grad_norm": 6.77015674340588, |
| "learning_rate": 1.3623595505617978e-07, |
| "logps/chosen": -41.57499694824219, |
| "logps/rejected": -55.820674896240234, |
| "loss": 0.5056, |
| "losses/dpo": 0.5236800909042358, |
| "losses/sft": 1.7500333786010742, |
| "losses/total": 0.5236800909042358, |
| "ref_logps/chosen": -34.20399475097656, |
| "ref_logps/rejected": -42.717681884765625, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.7371004819869995, |
| "rewards/margins": 0.5731986165046692, |
| "rewards/rejected": -1.3102991580963135, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.26, |
| "grad_norm": 6.520455497747794, |
| "learning_rate": 1.3483146067415728e-07, |
| "logps/chosen": -40.784889221191406, |
| "logps/rejected": -53.35670471191406, |
| "loss": 0.5158, |
| "losses/dpo": 0.39004355669021606, |
| "losses/sft": 1.4663935899734497, |
| "losses/total": 0.39004355669021606, |
| "ref_logps/chosen": -33.147525787353516, |
| "ref_logps/rejected": -39.950157165527344, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.7637366056442261, |
| "rewards/margins": 0.5769186019897461, |
| "rewards/rejected": -1.3406550884246826, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.27, |
| "grad_norm": 7.96122739673963, |
| "learning_rate": 1.334269662921348e-07, |
| "logps/chosen": -46.782169342041016, |
| "logps/rejected": -52.76530456542969, |
| "loss": 0.5923, |
| "losses/dpo": 0.6052607297897339, |
| "losses/sft": 1.6094651222229004, |
| "losses/total": 0.6052607297897339, |
| "ref_logps/chosen": -39.2327880859375, |
| "ref_logps/rejected": -41.223548889160156, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.754938006401062, |
| "rewards/margins": 0.3992377817630768, |
| "rewards/rejected": -1.1541757583618164, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.28, |
| "grad_norm": 7.751653637126333, |
| "learning_rate": 1.3202247191011234e-07, |
| "logps/chosen": -49.38646697998047, |
| "logps/rejected": -61.543209075927734, |
| "loss": 0.5327, |
| "losses/dpo": 0.556348443031311, |
| "losses/sft": 1.8087131977081299, |
| "losses/total": 0.556348443031311, |
| "ref_logps/chosen": -40.26612091064453, |
| "ref_logps/rejected": -47.002288818359375, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9120345115661621, |
| "rewards/margins": 0.5420576930046082, |
| "rewards/rejected": -1.454092264175415, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 7.798526029135885, |
| "learning_rate": 1.306179775280899e-07, |
| "logps/chosen": -43.65242004394531, |
| "logps/rejected": -57.450496673583984, |
| "loss": 0.5818, |
| "losses/dpo": 0.6229327321052551, |
| "losses/sft": 1.691450834274292, |
| "losses/total": 0.6229327321052551, |
| "ref_logps/chosen": -35.82228088378906, |
| "ref_logps/rejected": -45.80632781982422, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.7830138802528381, |
| "rewards/margins": 0.3814033269882202, |
| "rewards/rejected": -1.1644171476364136, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 7.543077771323995, |
| "learning_rate": 1.2921348314606743e-07, |
| "logps/chosen": -44.790557861328125, |
| "logps/rejected": -61.33608627319336, |
| "loss": 0.5259, |
| "losses/dpo": 0.648471474647522, |
| "losses/sft": 1.673068881034851, |
| "losses/total": 0.648471474647522, |
| "ref_logps/chosen": -36.91156768798828, |
| "ref_logps/rejected": -48.083534240722656, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.7878991961479187, |
| "rewards/margins": 0.5373560190200806, |
| "rewards/rejected": -1.325255274772644, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 7.321395556756757, |
| "learning_rate": 1.2780898876404493e-07, |
| "logps/chosen": -45.843082427978516, |
| "logps/rejected": -57.27900695800781, |
| "loss": 0.5602, |
| "losses/dpo": 0.5154864192008972, |
| "losses/sft": 1.5874884128570557, |
| "losses/total": 0.5154864192008972, |
| "ref_logps/chosen": -37.167877197265625, |
| "ref_logps/rejected": -43.86834716796875, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.8675205707550049, |
| "rewards/margins": 0.4735449552536011, |
| "rewards/rejected": -1.341065526008606, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.31, |
| "grad_norm": 7.180779900116491, |
| "learning_rate": 1.2640449438202246e-07, |
| "logps/chosen": -45.17388153076172, |
| "logps/rejected": -55.727230072021484, |
| "loss": 0.5217, |
| "losses/dpo": 0.4565548598766327, |
| "losses/sft": 1.4454078674316406, |
| "losses/total": 0.4565548598766327, |
| "ref_logps/chosen": -37.53833770751953, |
| "ref_logps/rejected": -42.80052185058594, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.7635539770126343, |
| "rewards/margins": 0.529117226600647, |
| "rewards/rejected": -1.2926712036132812, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 7.748539788981976, |
| "learning_rate": 1.25e-07, |
| "logps/chosen": -45.61334991455078, |
| "logps/rejected": -49.54269790649414, |
| "loss": 0.5743, |
| "losses/dpo": 0.4475148916244507, |
| "losses/sft": 1.3761274814605713, |
| "losses/total": 0.4475148916244507, |
| "ref_logps/chosen": -37.51769256591797, |
| "ref_logps/rejected": -37.59453582763672, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.8095651865005493, |
| "rewards/margins": 0.38525110483169556, |
| "rewards/rejected": -1.1948162317276, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 7.42646045779241, |
| "learning_rate": 1.2359550561797752e-07, |
| "logps/chosen": -42.217491149902344, |
| "logps/rejected": -57.62702941894531, |
| "loss": 0.5103, |
| "losses/dpo": 0.5864957571029663, |
| "losses/sft": 1.526570439338684, |
| "losses/total": 0.5864957571029663, |
| "ref_logps/chosen": -34.16813659667969, |
| "ref_logps/rejected": -43.83499526977539, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8049358129501343, |
| "rewards/margins": 0.5742676258087158, |
| "rewards/rejected": -1.37920343875885, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.33, |
| "grad_norm": 7.402951195988575, |
| "learning_rate": 1.2219101123595506e-07, |
| "logps/chosen": -43.753623962402344, |
| "logps/rejected": -55.725196838378906, |
| "loss": 0.5457, |
| "losses/dpo": 0.4583805501461029, |
| "losses/sft": 1.4125399589538574, |
| "losses/total": 0.4583805501461029, |
| "ref_logps/chosen": -35.22587585449219, |
| "ref_logps/rejected": -42.14164733886719, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.8527748584747314, |
| "rewards/margins": 0.5055804252624512, |
| "rewards/rejected": -1.3583552837371826, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.34, |
| "grad_norm": 8.606023903012021, |
| "learning_rate": 1.2078651685393259e-07, |
| "logps/chosen": -52.9200439453125, |
| "logps/rejected": -61.587310791015625, |
| "loss": 0.5909, |
| "losses/dpo": 0.5809124708175659, |
| "losses/sft": 1.585126280784607, |
| "losses/total": 0.5809124708175659, |
| "ref_logps/chosen": -43.860687255859375, |
| "ref_logps/rejected": -48.097747802734375, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.9059357047080994, |
| "rewards/margins": 0.443020224571228, |
| "rewards/rejected": -1.3489558696746826, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 7.302220410372284, |
| "learning_rate": 1.1938202247191012e-07, |
| "logps/chosen": -43.49970245361328, |
| "logps/rejected": -58.033485412597656, |
| "loss": 0.5186, |
| "losses/dpo": 0.48940473794937134, |
| "losses/sft": 1.4836596250534058, |
| "losses/total": 0.48940473794937134, |
| "ref_logps/chosen": -36.129432678222656, |
| "ref_logps/rejected": -44.55863952636719, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.7370268702507019, |
| "rewards/margins": 0.610457181930542, |
| "rewards/rejected": -1.3474839925765991, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 7.715132554841409, |
| "learning_rate": 1.1797752808988763e-07, |
| "logps/chosen": -45.6818733215332, |
| "logps/rejected": -57.750892639160156, |
| "loss": 0.5446, |
| "losses/dpo": 0.6576637625694275, |
| "losses/sft": 1.6136798858642578, |
| "losses/total": 0.6576637625694275, |
| "ref_logps/chosen": -37.14127731323242, |
| "ref_logps/rejected": -43.89785385131836, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.8540595769882202, |
| "rewards/margins": 0.5312445759773254, |
| "rewards/rejected": -1.3853040933609009, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 7.103504431576494, |
| "learning_rate": 1.1657303370786515e-07, |
| "logps/chosen": -43.971473693847656, |
| "logps/rejected": -57.290443420410156, |
| "loss": 0.5204, |
| "losses/dpo": 0.5861748456954956, |
| "losses/sft": 1.7284009456634521, |
| "losses/total": 0.5861748456954956, |
| "ref_logps/chosen": -35.535255432128906, |
| "ref_logps/rejected": -43.220550537109375, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8436219096183777, |
| "rewards/margins": 0.5633664727210999, |
| "rewards/rejected": -1.4069883823394775, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.37, |
| "grad_norm": 7.74539036906925, |
| "learning_rate": 1.151685393258427e-07, |
| "logps/chosen": -45.654815673828125, |
| "logps/rejected": -55.697998046875, |
| "loss": 0.5716, |
| "losses/dpo": 0.6496266722679138, |
| "losses/sft": 1.7458603382110596, |
| "losses/total": 0.6496266722679138, |
| "ref_logps/chosen": -37.42692947387695, |
| "ref_logps/rejected": -43.09944534301758, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.8227887749671936, |
| "rewards/margins": 0.43706685304641724, |
| "rewards/rejected": -1.2598556280136108, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 7.3016087270783965, |
| "learning_rate": 1.1376404494382023e-07, |
| "logps/chosen": -44.80577087402344, |
| "logps/rejected": -58.83177185058594, |
| "loss": 0.5616, |
| "losses/dpo": 0.5281144380569458, |
| "losses/sft": 1.5373191833496094, |
| "losses/total": 0.5281144380569458, |
| "ref_logps/chosen": -35.78607940673828, |
| "ref_logps/rejected": -44.85388946533203, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.9019690155982971, |
| "rewards/margins": 0.49581989645957947, |
| "rewards/rejected": -1.3977890014648438, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 7.369973670185862, |
| "learning_rate": 1.1235955056179774e-07, |
| "logps/chosen": -44.48060607910156, |
| "logps/rejected": -57.20940399169922, |
| "loss": 0.5253, |
| "losses/dpo": 0.5681818723678589, |
| "losses/sft": 1.7861613035202026, |
| "losses/total": 0.5681818723678589, |
| "ref_logps/chosen": -35.98798751831055, |
| "ref_logps/rejected": -42.8939094543457, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8492615818977356, |
| "rewards/margins": 0.5822880268096924, |
| "rewards/rejected": -1.4315495491027832, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.39, |
| "grad_norm": 7.531569588872603, |
| "learning_rate": 1.1095505617977527e-07, |
| "logps/chosen": -43.765594482421875, |
| "logps/rejected": -55.92811965942383, |
| "loss": 0.5324, |
| "losses/dpo": 0.5917935371398926, |
| "losses/sft": 1.6781896352767944, |
| "losses/total": 0.5917935371398926, |
| "ref_logps/chosen": -35.10169219970703, |
| "ref_logps/rejected": -42.32771301269531, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8663901090621948, |
| "rewards/margins": 0.4936509132385254, |
| "rewards/rejected": -1.3600411415100098, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 7.00021473521157, |
| "learning_rate": 1.095505617977528e-07, |
| "logps/chosen": -43.7101936340332, |
| "logps/rejected": -55.512020111083984, |
| "loss": 0.5587, |
| "losses/dpo": 0.3991687297821045, |
| "losses/sft": 1.6147840023040771, |
| "losses/total": 0.3991687297821045, |
| "ref_logps/chosen": -34.230926513671875, |
| "ref_logps/rejected": -40.64677810668945, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.9479266405105591, |
| "rewards/margins": 0.5385974049568176, |
| "rewards/rejected": -1.4865241050720215, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.41, |
| "grad_norm": 7.422365442434664, |
| "learning_rate": 1.0814606741573033e-07, |
| "logps/chosen": -44.69245910644531, |
| "logps/rejected": -51.95305252075195, |
| "loss": 0.5704, |
| "losses/dpo": 0.5418112277984619, |
| "losses/sft": 1.3795506954193115, |
| "losses/total": 0.5418112277984619, |
| "ref_logps/chosen": -35.97312545776367, |
| "ref_logps/rejected": -39.2027702331543, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.8719329833984375, |
| "rewards/margins": 0.40309497714042664, |
| "rewards/rejected": -1.2750279903411865, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.42, |
| "grad_norm": 7.787952340155071, |
| "learning_rate": 1.0674157303370785e-07, |
| "logps/chosen": -46.54815673828125, |
| "logps/rejected": -55.3624153137207, |
| "loss": 0.5672, |
| "losses/dpo": 0.5258245468139648, |
| "losses/sft": 1.7972207069396973, |
| "losses/total": 0.5258245468139648, |
| "ref_logps/chosen": -38.29623031616211, |
| "ref_logps/rejected": -42.49595642089844, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8251928687095642, |
| "rewards/margins": 0.4614531695842743, |
| "rewards/rejected": -1.2866460084915161, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.42, |
| "grad_norm": 7.716635082250954, |
| "learning_rate": 1.0533707865168538e-07, |
| "logps/chosen": -45.037723541259766, |
| "logps/rejected": -53.17112350463867, |
| "loss": 0.5781, |
| "losses/dpo": 0.7091802358627319, |
| "losses/sft": 1.6653159856796265, |
| "losses/total": 0.7091802358627319, |
| "ref_logps/chosen": -35.86992263793945, |
| "ref_logps/rejected": -39.99578857421875, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.9167801141738892, |
| "rewards/margins": 0.4007537364959717, |
| "rewards/rejected": -1.3175339698791504, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.43, |
| "grad_norm": 6.913928315105185, |
| "learning_rate": 1.0393258426966293e-07, |
| "logps/chosen": -46.575584411621094, |
| "logps/rejected": -59.92189407348633, |
| "loss": 0.4903, |
| "losses/dpo": 0.4884983003139496, |
| "losses/sft": 1.5409932136535645, |
| "losses/total": 0.4884983003139496, |
| "ref_logps/chosen": -39.07024383544922, |
| "ref_logps/rejected": -45.98902893066406, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.750534176826477, |
| "rewards/margins": 0.6427518725395203, |
| "rewards/rejected": -1.3932859897613525, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 7.359357410660962, |
| "learning_rate": 1.0252808988764044e-07, |
| "logps/chosen": -43.37224578857422, |
| "logps/rejected": -57.634010314941406, |
| "loss": 0.5156, |
| "losses/dpo": 0.4852214455604553, |
| "losses/sft": 1.7198714017868042, |
| "losses/total": 0.4852214455604553, |
| "ref_logps/chosen": -34.993377685546875, |
| "ref_logps/rejected": -43.477684020996094, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.8378866910934448, |
| "rewards/margins": 0.5777460336685181, |
| "rewards/rejected": -1.415632724761963, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 6.246986321807027, |
| "learning_rate": 1.0112359550561797e-07, |
| "logps/chosen": -39.65964889526367, |
| "logps/rejected": -53.03920364379883, |
| "loss": 0.4975, |
| "losses/dpo": 0.4038864076137543, |
| "losses/sft": 1.4372718334197998, |
| "losses/total": 0.4038864076137543, |
| "ref_logps/chosen": -32.10685348510742, |
| "ref_logps/rejected": -39.401954650878906, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.7552794218063354, |
| "rewards/margins": 0.6084451675415039, |
| "rewards/rejected": -1.3637245893478394, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 7.331178402164894, |
| "learning_rate": 9.971910112359549e-08, |
| "logps/chosen": -44.56993865966797, |
| "logps/rejected": -59.496734619140625, |
| "loss": 0.5273, |
| "losses/dpo": 0.4201672077178955, |
| "losses/sft": 1.5359259843826294, |
| "losses/total": 0.4201672077178955, |
| "ref_logps/chosen": -36.124656677246094, |
| "ref_logps/rejected": -45.465816497802734, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8445284962654114, |
| "rewards/margins": 0.558563232421875, |
| "rewards/rejected": -1.4030916690826416, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.46, |
| "grad_norm": 8.06984724769854, |
| "learning_rate": 9.831460674157303e-08, |
| "logps/chosen": -48.639495849609375, |
| "logps/rejected": -56.76270294189453, |
| "loss": 0.5197, |
| "losses/dpo": 0.4748002588748932, |
| "losses/sft": 1.489260196685791, |
| "losses/total": 0.4748002588748932, |
| "ref_logps/chosen": -40.237266540527344, |
| "ref_logps/rejected": -42.580772399902344, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8402228355407715, |
| "rewards/margins": 0.5779698491096497, |
| "rewards/rejected": -1.418192744255066, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.47, |
| "grad_norm": 7.991421566802235, |
| "learning_rate": 9.691011235955055e-08, |
| "logps/chosen": -46.860626220703125, |
| "logps/rejected": -58.88548278808594, |
| "loss": 0.5235, |
| "losses/dpo": 0.6238963603973389, |
| "losses/sft": 1.7782843112945557, |
| "losses/total": 0.6238963603973389, |
| "ref_logps/chosen": -38.90311050415039, |
| "ref_logps/rejected": -45.0700569152832, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.7957516312599182, |
| "rewards/margins": 0.5857904553413391, |
| "rewards/rejected": -1.3815419673919678, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 7.266548128858226, |
| "learning_rate": 9.550561797752808e-08, |
| "logps/chosen": -42.59172821044922, |
| "logps/rejected": -52.10871505737305, |
| "loss": 0.5667, |
| "losses/dpo": 0.6280735731124878, |
| "losses/sft": 1.5307084321975708, |
| "losses/total": 0.6280735731124878, |
| "ref_logps/chosen": -34.418861389160156, |
| "ref_logps/rejected": -39.33207702636719, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.8172866106033325, |
| "rewards/margins": 0.46037718653678894, |
| "rewards/rejected": -1.2776637077331543, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 7.642236003437132, |
| "learning_rate": 9.410112359550561e-08, |
| "logps/chosen": -45.72086715698242, |
| "logps/rejected": -52.531341552734375, |
| "loss": 0.5663, |
| "losses/dpo": 0.5168911814689636, |
| "losses/sft": 1.7978273630142212, |
| "losses/total": 0.5168911814689636, |
| "ref_logps/chosen": -37.207481384277344, |
| "ref_logps/rejected": -39.487640380859375, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.851338267326355, |
| "rewards/margins": 0.4530315101146698, |
| "rewards/rejected": -1.3043696880340576, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.49, |
| "grad_norm": 7.854817727198668, |
| "learning_rate": 9.269662921348314e-08, |
| "logps/chosen": -46.91447448730469, |
| "logps/rejected": -57.34621810913086, |
| "loss": 0.5504, |
| "losses/dpo": 0.5696989297866821, |
| "losses/sft": 1.708069086074829, |
| "losses/total": 0.5696989297866821, |
| "ref_logps/chosen": -37.96686553955078, |
| "ref_logps/rejected": -43.223960876464844, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.894761323928833, |
| "rewards/margins": 0.5174643993377686, |
| "rewards/rejected": -1.4122257232666016, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 7.286496155272333, |
| "learning_rate": 9.129213483146067e-08, |
| "logps/chosen": -44.248069763183594, |
| "logps/rejected": -60.32553482055664, |
| "loss": 0.5098, |
| "losses/dpo": 0.6059004664421082, |
| "losses/sft": 1.61500883102417, |
| "losses/total": 0.6059004664421082, |
| "ref_logps/chosen": -35.856903076171875, |
| "ref_logps/rejected": -46.063690185546875, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.839116632938385, |
| "rewards/margins": 0.5870682001113892, |
| "rewards/rejected": -1.426184892654419, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 7.873630188834188, |
| "learning_rate": 8.988764044943819e-08, |
| "logps/chosen": -47.52843475341797, |
| "logps/rejected": -58.742042541503906, |
| "loss": 0.5465, |
| "losses/dpo": 0.44472765922546387, |
| "losses/sft": 1.8056182861328125, |
| "losses/total": 0.44472765922546387, |
| "ref_logps/chosen": -37.702247619628906, |
| "ref_logps/rejected": -43.76355743408203, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.982619047164917, |
| "rewards/margins": 0.5152289867401123, |
| "rewards/rejected": -1.4978480339050293, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 7.654569968967968, |
| "learning_rate": 8.848314606741572e-08, |
| "logps/chosen": -45.35044860839844, |
| "logps/rejected": -54.683128356933594, |
| "loss": 0.5477, |
| "losses/dpo": 0.6351585388183594, |
| "losses/sft": 1.465951681137085, |
| "losses/total": 0.6351585388183594, |
| "ref_logps/chosen": -37.354312896728516, |
| "ref_logps/rejected": -41.636627197265625, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.7996135354042053, |
| "rewards/margins": 0.505035936832428, |
| "rewards/rejected": -1.3046493530273438, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 7.093489997148873, |
| "learning_rate": 8.707865168539325e-08, |
| "logps/chosen": -44.989524841308594, |
| "logps/rejected": -56.54049301147461, |
| "loss": 0.5235, |
| "losses/dpo": 0.6136016845703125, |
| "losses/sft": 1.876564860343933, |
| "losses/total": 0.6136016845703125, |
| "ref_logps/chosen": -36.00385284423828, |
| "ref_logps/rejected": -41.88980484008789, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8985673785209656, |
| "rewards/margins": 0.566501259803772, |
| "rewards/rejected": -1.4650685787200928, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.53, |
| "grad_norm": 7.6289229336667, |
| "learning_rate": 8.567415730337078e-08, |
| "logps/chosen": -45.615413665771484, |
| "logps/rejected": -56.45619201660156, |
| "loss": 0.5586, |
| "losses/dpo": 0.5084734559059143, |
| "losses/sft": 1.6048380136489868, |
| "losses/total": 0.5084734559059143, |
| "ref_logps/chosen": -36.61585998535156, |
| "ref_logps/rejected": -42.79827880859375, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.8999553322792053, |
| "rewards/margins": 0.4658358097076416, |
| "rewards/rejected": -1.3657910823822021, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 8.275598752517682, |
| "learning_rate": 8.426966292134831e-08, |
| "logps/chosen": -47.839508056640625, |
| "logps/rejected": -61.7794303894043, |
| "loss": 0.5368, |
| "losses/dpo": 0.5232934355735779, |
| "losses/sft": 1.4998161792755127, |
| "losses/total": 0.5232934355735779, |
| "ref_logps/chosen": -38.512969970703125, |
| "ref_logps/rejected": -46.98280334472656, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.9326539039611816, |
| "rewards/margins": 0.5470089316368103, |
| "rewards/rejected": -1.4796628952026367, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 6.837098147362294, |
| "learning_rate": 8.286516853932583e-08, |
| "logps/chosen": -42.03435516357422, |
| "logps/rejected": -58.38957214355469, |
| "loss": 0.4918, |
| "losses/dpo": 0.34719789028167725, |
| "losses/sft": 1.4158234596252441, |
| "losses/total": 0.34719789028167725, |
| "ref_logps/chosen": -34.547210693359375, |
| "ref_logps/rejected": -44.05992889404297, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.7487142086029053, |
| "rewards/margins": 0.6842500567436218, |
| "rewards/rejected": -1.4329640865325928, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 8.253144412756335, |
| "learning_rate": 8.146067415730337e-08, |
| "logps/chosen": -45.23094940185547, |
| "logps/rejected": -53.472965240478516, |
| "loss": 0.595, |
| "losses/dpo": 0.5666919350624084, |
| "losses/sft": 1.5198816061019897, |
| "losses/total": 0.5666919350624084, |
| "ref_logps/chosen": -36.06470489501953, |
| "ref_logps/rejected": -40.16011047363281, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.9166238903999329, |
| "rewards/margins": 0.4146617650985718, |
| "rewards/rejected": -1.3312857151031494, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 8.149761017487126, |
| "learning_rate": 8.005617977528089e-08, |
| "logps/chosen": -45.32318115234375, |
| "logps/rejected": -51.304725646972656, |
| "loss": 0.6056, |
| "losses/dpo": 0.466902494430542, |
| "losses/sft": 1.4729348421096802, |
| "losses/total": 0.466902494430542, |
| "ref_logps/chosen": -35.85576248168945, |
| "ref_logps/rejected": -37.858970642089844, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.9467417597770691, |
| "rewards/margins": 0.39783352613449097, |
| "rewards/rejected": -1.34457528591156, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 7.054066859896987, |
| "learning_rate": 7.865168539325842e-08, |
| "logps/chosen": -45.38795471191406, |
| "logps/rejected": -57.93950653076172, |
| "loss": 0.5182, |
| "losses/dpo": 0.4842032194137573, |
| "losses/sft": 1.6942293643951416, |
| "losses/total": 0.4842032194137573, |
| "ref_logps/chosen": -36.484046936035156, |
| "ref_logps/rejected": -43.67852783203125, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8903906345367432, |
| "rewards/margins": 0.5357075333595276, |
| "rewards/rejected": -1.426098108291626, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 7.258837050647915, |
| "learning_rate": 7.724719101123594e-08, |
| "logps/chosen": -46.00672149658203, |
| "logps/rejected": -59.08924865722656, |
| "loss": 0.5373, |
| "losses/dpo": 0.5424889326095581, |
| "losses/sft": 1.6475489139556885, |
| "losses/total": 0.5424889326095581, |
| "ref_logps/chosen": -37.73750305175781, |
| "ref_logps/rejected": -45.23866271972656, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.826921820640564, |
| "rewards/margins": 0.5581368207931519, |
| "rewards/rejected": -1.3850586414337158, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.58, |
| "grad_norm": 7.988126049073018, |
| "learning_rate": 7.584269662921348e-08, |
| "logps/chosen": -46.84196472167969, |
| "logps/rejected": -55.757198333740234, |
| "loss": 0.5662, |
| "losses/dpo": 0.353384792804718, |
| "losses/sft": 1.717570424079895, |
| "losses/total": 0.353384792804718, |
| "ref_logps/chosen": -37.82433319091797, |
| "ref_logps/rejected": -42.26597213745117, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9017627835273743, |
| "rewards/margins": 0.44735997915267944, |
| "rewards/rejected": -1.3491227626800537, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.59, |
| "grad_norm": 7.618340962447428, |
| "learning_rate": 7.443820224719101e-08, |
| "logps/chosen": -43.753684997558594, |
| "logps/rejected": -55.105316162109375, |
| "loss": 0.546, |
| "losses/dpo": 0.6419227123260498, |
| "losses/sft": 1.6892149448394775, |
| "losses/total": 0.6419227123260498, |
| "ref_logps/chosen": -35.38850402832031, |
| "ref_logps/rejected": -41.928646087646484, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.8365182876586914, |
| "rewards/margins": 0.4811485707759857, |
| "rewards/rejected": -1.3176668882369995, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 7.5691903171304915, |
| "learning_rate": 7.303370786516853e-08, |
| "logps/chosen": -44.20778274536133, |
| "logps/rejected": -55.97998046875, |
| "loss": 0.5407, |
| "losses/dpo": 0.5625724196434021, |
| "losses/sft": 1.5753792524337769, |
| "losses/total": 0.5625724196434021, |
| "ref_logps/chosen": -35.950294494628906, |
| "ref_logps/rejected": -42.38732147216797, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8257489204406738, |
| "rewards/margins": 0.5335172414779663, |
| "rewards/rejected": -1.3592660427093506, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 7.013411401019271, |
| "learning_rate": 7.162921348314606e-08, |
| "logps/chosen": -48.60981750488281, |
| "logps/rejected": -61.60570526123047, |
| "loss": 0.4779, |
| "losses/dpo": 0.5409685373306274, |
| "losses/sft": 1.6795134544372559, |
| "losses/total": 0.5409685373306274, |
| "ref_logps/chosen": -39.66438293457031, |
| "ref_logps/rejected": -45.88689422607422, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.8945437073707581, |
| "rewards/margins": 0.677337646484375, |
| "rewards/rejected": -1.5718812942504883, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.61, |
| "grad_norm": 7.235335525204532, |
| "learning_rate": 7.022471910112359e-08, |
| "logps/chosen": -40.0158805847168, |
| "logps/rejected": -53.02748107910156, |
| "loss": 0.5321, |
| "losses/dpo": 0.5608981847763062, |
| "losses/sft": 1.2928898334503174, |
| "losses/total": 0.5608981847763062, |
| "ref_logps/chosen": -32.48070526123047, |
| "ref_logps/rejected": -40.139122009277344, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.7535171508789062, |
| "rewards/margins": 0.5353185534477234, |
| "rewards/rejected": -1.2888355255126953, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.62, |
| "grad_norm": 7.561289554463479, |
| "learning_rate": 6.882022471910112e-08, |
| "logps/chosen": -45.8831787109375, |
| "logps/rejected": -52.74605178833008, |
| "loss": 0.5634, |
| "losses/dpo": 0.5234625935554504, |
| "losses/sft": 1.5698529481887817, |
| "losses/total": 0.5234625935554504, |
| "ref_logps/chosen": -36.858985900878906, |
| "ref_logps/rejected": -39.08251953125, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.9024193286895752, |
| "rewards/margins": 0.4639340043067932, |
| "rewards/rejected": -1.3663533926010132, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 7.714313510104845, |
| "learning_rate": 6.741573033707864e-08, |
| "logps/chosen": -47.23927307128906, |
| "logps/rejected": -56.10950469970703, |
| "loss": 0.5513, |
| "losses/dpo": 0.5834592580795288, |
| "losses/sft": 1.8191860914230347, |
| "losses/total": 0.5834592580795288, |
| "ref_logps/chosen": -38.12981414794922, |
| "ref_logps/rejected": -41.886940002441406, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.9109456539154053, |
| "rewards/margins": 0.5113106966018677, |
| "rewards/rejected": -1.422256350517273, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 7.158492594820948, |
| "learning_rate": 6.601123595505617e-08, |
| "logps/chosen": -48.13493347167969, |
| "logps/rejected": -60.77044677734375, |
| "loss": 0.4977, |
| "losses/dpo": 0.36096107959747314, |
| "losses/sft": 1.417677640914917, |
| "losses/total": 0.36096107959747314, |
| "ref_logps/chosen": -39.71112823486328, |
| "ref_logps/rejected": -45.55910110473633, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.8423808813095093, |
| "rewards/margins": 0.6787533164024353, |
| "rewards/rejected": -1.5211341381072998, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 7.665270223156107, |
| "learning_rate": 6.460674157303371e-08, |
| "logps/chosen": -45.67338562011719, |
| "logps/rejected": -52.81538391113281, |
| "loss": 0.5593, |
| "losses/dpo": 0.5008495450019836, |
| "losses/sft": 1.4033509492874146, |
| "losses/total": 0.5008495450019836, |
| "ref_logps/chosen": -37.30530548095703, |
| "ref_logps/rejected": -39.90568542480469, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8368085026741028, |
| "rewards/margins": 0.45416122674942017, |
| "rewards/rejected": -1.290969729423523, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 7.472808082581494, |
| "learning_rate": 6.320224719101123e-08, |
| "logps/chosen": -43.985435485839844, |
| "logps/rejected": -57.295692443847656, |
| "loss": 0.5187, |
| "losses/dpo": 0.5680770874023438, |
| "losses/sft": 1.4148482084274292, |
| "losses/total": 0.5680770874023438, |
| "ref_logps/chosen": -36.4133186340332, |
| "ref_logps/rejected": -43.849090576171875, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.757211446762085, |
| "rewards/margins": 0.5874490737915039, |
| "rewards/rejected": -1.3446605205535889, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.66, |
| "grad_norm": 8.189112257010201, |
| "learning_rate": 6.179775280898876e-08, |
| "logps/chosen": -47.502281188964844, |
| "logps/rejected": -54.84540939331055, |
| "loss": 0.583, |
| "losses/dpo": 0.5579338073730469, |
| "losses/sft": 1.615804672241211, |
| "losses/total": 0.5579338073730469, |
| "ref_logps/chosen": -38.33686065673828, |
| "ref_logps/rejected": -41.62626647949219, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.9165424108505249, |
| "rewards/margins": 0.40537166595458984, |
| "rewards/rejected": -1.3219139575958252, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.66, |
| "grad_norm": 7.805410708655585, |
| "learning_rate": 6.039325842696629e-08, |
| "logps/chosen": -44.361324310302734, |
| "logps/rejected": -59.17631149291992, |
| "loss": 0.5472, |
| "losses/dpo": 0.6015689373016357, |
| "losses/sft": 1.6676236391067505, |
| "losses/total": 0.6015689373016357, |
| "ref_logps/chosen": -35.77091979980469, |
| "ref_logps/rejected": -45.55202865600586, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8590403199195862, |
| "rewards/margins": 0.5033884048461914, |
| "rewards/rejected": -1.362428903579712, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.67, |
| "grad_norm": 8.028184259224918, |
| "learning_rate": 5.898876404494382e-08, |
| "logps/chosen": -46.5517463684082, |
| "logps/rejected": -56.04482650756836, |
| "loss": 0.5542, |
| "losses/dpo": 0.6546050310134888, |
| "losses/sft": 1.504585862159729, |
| "losses/total": 0.6546050310134888, |
| "ref_logps/chosen": -37.785274505615234, |
| "ref_logps/rejected": -42.51100540161133, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.8766471147537231, |
| "rewards/margins": 0.47673481702804565, |
| "rewards/rejected": -1.3533821105957031, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 7.260620206463691, |
| "learning_rate": 5.758426966292135e-08, |
| "logps/chosen": -48.80982971191406, |
| "logps/rejected": -56.19672393798828, |
| "loss": 0.5515, |
| "losses/dpo": 0.46307122707366943, |
| "losses/sft": 1.685928463935852, |
| "losses/total": 0.46307122707366943, |
| "ref_logps/chosen": -40.1851806640625, |
| "ref_logps/rejected": -43.02751159667969, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8624651432037354, |
| "rewards/margins": 0.4544559121131897, |
| "rewards/rejected": -1.3169212341308594, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 7.136349320311039, |
| "learning_rate": 5.617977528089887e-08, |
| "logps/chosen": -41.40632629394531, |
| "logps/rejected": -54.12514114379883, |
| "loss": 0.5343, |
| "losses/dpo": 0.45047110319137573, |
| "losses/sft": 1.3219261169433594, |
| "losses/total": 0.45047110319137573, |
| "ref_logps/chosen": -33.34068298339844, |
| "ref_logps/rejected": -40.50140380859375, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8065648078918457, |
| "rewards/margins": 0.555808961391449, |
| "rewards/rejected": -1.3623738288879395, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 7.456298216594317, |
| "learning_rate": 5.47752808988764e-08, |
| "logps/chosen": -44.206722259521484, |
| "logps/rejected": -55.71735382080078, |
| "loss": 0.5494, |
| "losses/dpo": 0.4734205901622772, |
| "losses/sft": 1.4844837188720703, |
| "losses/total": 0.4734205901622772, |
| "ref_logps/chosen": -35.47336959838867, |
| "ref_logps/rejected": -41.92726516723633, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8733350038528442, |
| "rewards/margins": 0.5056736469268799, |
| "rewards/rejected": -1.3790085315704346, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 7.41987426694341, |
| "learning_rate": 5.3370786516853926e-08, |
| "logps/chosen": -46.22618865966797, |
| "logps/rejected": -56.47550964355469, |
| "loss": 0.5003, |
| "losses/dpo": 0.562317430973053, |
| "losses/sft": 1.491492509841919, |
| "losses/total": 0.562317430973053, |
| "ref_logps/chosen": -37.904022216796875, |
| "ref_logps/rejected": -41.78309631347656, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -0.8322172164916992, |
| "rewards/margins": 0.6370242834091187, |
| "rewards/rejected": -1.4692414999008179, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.71, |
| "grad_norm": 6.765690296642083, |
| "learning_rate": 5.196629213483146e-08, |
| "logps/chosen": -41.32649612426758, |
| "logps/rejected": -55.117488861083984, |
| "loss": 0.4906, |
| "losses/dpo": 0.45937132835388184, |
| "losses/sft": 1.3386218547821045, |
| "losses/total": 0.45937132835388184, |
| "ref_logps/chosen": -33.87388610839844, |
| "ref_logps/rejected": -41.34483337402344, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.7452608942985535, |
| "rewards/margins": 0.6320046782493591, |
| "rewards/rejected": -1.377265453338623, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.72, |
| "grad_norm": 7.95832621655637, |
| "learning_rate": 5.056179775280899e-08, |
| "logps/chosen": -44.83673858642578, |
| "logps/rejected": -54.593666076660156, |
| "loss": 0.5529, |
| "losses/dpo": 0.5646368861198425, |
| "losses/sft": 1.3903212547302246, |
| "losses/total": 0.5646368861198425, |
| "ref_logps/chosen": -36.184173583984375, |
| "ref_logps/rejected": -41.40753173828125, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.865256667137146, |
| "rewards/margins": 0.45335638523101807, |
| "rewards/rejected": -1.318613052368164, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.72, |
| "grad_norm": 8.359120516602266, |
| "learning_rate": 4.915730337078652e-08, |
| "logps/chosen": -48.39961624145508, |
| "logps/rejected": -54.2540397644043, |
| "loss": 0.5967, |
| "losses/dpo": 0.7173389196395874, |
| "losses/sft": 1.989745020866394, |
| "losses/total": 0.7173389196395874, |
| "ref_logps/chosen": -39.236839294433594, |
| "ref_logps/rejected": -40.71720504760742, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.9162774682044983, |
| "rewards/margins": 0.4374057650566101, |
| "rewards/rejected": -1.3536832332611084, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.73, |
| "grad_norm": 7.8949116422203645, |
| "learning_rate": 4.775280898876404e-08, |
| "logps/chosen": -45.1904296875, |
| "logps/rejected": -55.9586181640625, |
| "loss": 0.5313, |
| "losses/dpo": 0.6307837963104248, |
| "losses/sft": 1.725508213043213, |
| "losses/total": 0.6307837963104248, |
| "ref_logps/chosen": -36.340023040771484, |
| "ref_logps/rejected": -41.507598876953125, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.8850406408309937, |
| "rewards/margins": 0.5600608587265015, |
| "rewards/rejected": -1.4451014995574951, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.74, |
| "grad_norm": 7.439921856307659, |
| "learning_rate": 4.634831460674157e-08, |
| "logps/chosen": -47.467689514160156, |
| "logps/rejected": -55.250770568847656, |
| "loss": 0.5418, |
| "losses/dpo": 0.6079765558242798, |
| "losses/sft": 1.8188178539276123, |
| "losses/total": 0.6079765558242798, |
| "ref_logps/chosen": -38.65827560424805, |
| "ref_logps/rejected": -41.087459564208984, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.8809411525726318, |
| "rewards/margins": 0.53538978099823, |
| "rewards/rejected": -1.4163308143615723, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 7.250730071839225, |
| "learning_rate": 4.4943820224719096e-08, |
| "logps/chosen": -42.77532196044922, |
| "logps/rejected": -59.660240173339844, |
| "loss": 0.4573, |
| "losses/dpo": 0.4384981393814087, |
| "losses/sft": 1.4787318706512451, |
| "losses/total": 0.4384981393814087, |
| "ref_logps/chosen": -35.051734924316406, |
| "ref_logps/rejected": -44.746734619140625, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -0.7723584175109863, |
| "rewards/margins": 0.7189919948577881, |
| "rewards/rejected": -1.4913504123687744, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 8.23044878029811, |
| "learning_rate": 4.3539325842696626e-08, |
| "logps/chosen": -49.66007995605469, |
| "logps/rejected": -60.10342025756836, |
| "loss": 0.5469, |
| "losses/dpo": 0.5084363222122192, |
| "losses/sft": 1.8791687488555908, |
| "losses/total": 0.5084363222122192, |
| "ref_logps/chosen": -40.346317291259766, |
| "ref_logps/rejected": -45.41026306152344, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.9313763380050659, |
| "rewards/margins": 0.5379395484924316, |
| "rewards/rejected": -1.469315767288208, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 7.298326331639276, |
| "learning_rate": 4.213483146067416e-08, |
| "logps/chosen": -48.57288360595703, |
| "logps/rejected": -57.29835510253906, |
| "loss": 0.5395, |
| "losses/dpo": 0.44340649247169495, |
| "losses/sft": 1.5243843793869019, |
| "losses/total": 0.44340649247169495, |
| "ref_logps/chosen": -39.81139373779297, |
| "ref_logps/rejected": -43.033912658691406, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.8761484622955322, |
| "rewards/margins": 0.5502957701683044, |
| "rewards/rejected": -1.4264442920684814, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.77, |
| "grad_norm": 8.51804253270616, |
| "learning_rate": 4.073033707865169e-08, |
| "logps/chosen": -44.41961669921875, |
| "logps/rejected": -53.89155578613281, |
| "loss": 0.5723, |
| "losses/dpo": 0.5301268100738525, |
| "losses/sft": 1.8131489753723145, |
| "losses/total": 0.5301268100738525, |
| "ref_logps/chosen": -35.50189971923828, |
| "ref_logps/rejected": -40.51585388183594, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8917717933654785, |
| "rewards/margins": 0.44579851627349854, |
| "rewards/rejected": -1.337570309638977, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.78, |
| "grad_norm": 7.54105725557247, |
| "learning_rate": 3.932584269662921e-08, |
| "logps/chosen": -41.12848663330078, |
| "logps/rejected": -55.290313720703125, |
| "loss": 0.54, |
| "losses/dpo": 0.49226510524749756, |
| "losses/sft": 1.37047278881073, |
| "losses/total": 0.49226510524749756, |
| "ref_logps/chosen": -32.751502990722656, |
| "ref_logps/rejected": -41.476318359375, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8376982808113098, |
| "rewards/margins": 0.5437013506889343, |
| "rewards/rejected": -1.3813996315002441, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.78, |
| "grad_norm": 7.706900297271427, |
| "learning_rate": 3.792134831460674e-08, |
| "logps/chosen": -45.84465408325195, |
| "logps/rejected": -56.17218780517578, |
| "loss": 0.5373, |
| "losses/dpo": 0.5797220468521118, |
| "losses/sft": 1.6374412775039673, |
| "losses/total": 0.5797220468521118, |
| "ref_logps/chosen": -36.70783233642578, |
| "ref_logps/rejected": -41.77714538574219, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.913682222366333, |
| "rewards/margins": 0.5258220434188843, |
| "rewards/rejected": -1.4395041465759277, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.79, |
| "grad_norm": 7.7547810769646555, |
| "learning_rate": 3.6516853932584266e-08, |
| "logps/chosen": -42.759098052978516, |
| "logps/rejected": -52.87897491455078, |
| "loss": 0.5836, |
| "losses/dpo": 0.7289267778396606, |
| "losses/sft": 1.7013481855392456, |
| "losses/total": 0.7289267778396606, |
| "ref_logps/chosen": -34.204769134521484, |
| "ref_logps/rejected": -40.38142776489258, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.85543292760849, |
| "rewards/margins": 0.3943214416503906, |
| "rewards/rejected": -1.2497543096542358, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 7.07250671481464, |
| "learning_rate": 3.5112359550561796e-08, |
| "logps/chosen": -45.09293746948242, |
| "logps/rejected": -56.41200256347656, |
| "loss": 0.5025, |
| "losses/dpo": 0.42966747283935547, |
| "losses/sft": 1.5621216297149658, |
| "losses/total": 0.42966747283935547, |
| "ref_logps/chosen": -37.37934494018555, |
| "ref_logps/rejected": -42.70643615722656, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.7713593244552612, |
| "rewards/margins": 0.599197506904602, |
| "rewards/rejected": -1.3705568313598633, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.81, |
| "grad_norm": 8.204140371424504, |
| "learning_rate": 3.370786516853932e-08, |
| "logps/chosen": -48.024269104003906, |
| "logps/rejected": -57.5866584777832, |
| "loss": 0.5389, |
| "losses/dpo": 0.5919984579086304, |
| "losses/sft": 1.4933536052703857, |
| "losses/total": 0.5919984579086304, |
| "ref_logps/chosen": -39.3967399597168, |
| "ref_logps/rejected": -43.82619857788086, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8627532124519348, |
| "rewards/margins": 0.5132932662963867, |
| "rewards/rejected": -1.3760464191436768, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.82, |
| "grad_norm": 8.113226187403898, |
| "learning_rate": 3.230337078651686e-08, |
| "logps/chosen": -46.628257751464844, |
| "logps/rejected": -62.4483642578125, |
| "loss": 0.4997, |
| "losses/dpo": 0.4237878918647766, |
| "losses/sft": 1.5488381385803223, |
| "losses/total": 0.4237878918647766, |
| "ref_logps/chosen": -38.190887451171875, |
| "ref_logps/rejected": -47.450843811035156, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8437370657920837, |
| "rewards/margins": 0.6560153961181641, |
| "rewards/rejected": -1.4997525215148926, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.82, |
| "grad_norm": 7.437009897432824, |
| "learning_rate": 3.089887640449438e-08, |
| "logps/chosen": -44.36549377441406, |
| "logps/rejected": -59.504005432128906, |
| "loss": 0.4967, |
| "losses/dpo": 0.42525550723075867, |
| "losses/sft": 1.5591559410095215, |
| "losses/total": 0.42525550723075867, |
| "ref_logps/chosen": -35.93299102783203, |
| "ref_logps/rejected": -44.88318634033203, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8432497978210449, |
| "rewards/margins": 0.6188317537307739, |
| "rewards/rejected": -1.4620814323425293, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.83, |
| "grad_norm": 6.665463460188975, |
| "learning_rate": 2.949438202247191e-08, |
| "logps/chosen": -42.69816970825195, |
| "logps/rejected": -59.00178909301758, |
| "loss": 0.4858, |
| "losses/dpo": 0.40338996052742004, |
| "losses/sft": 1.7176090478897095, |
| "losses/total": 0.40338996052742004, |
| "ref_logps/chosen": -34.84870147705078, |
| "ref_logps/rejected": -44.43999481201172, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -0.7849469184875488, |
| "rewards/margins": 0.6712321639060974, |
| "rewards/rejected": -1.456179141998291, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 7.288923139558504, |
| "learning_rate": 2.8089887640449436e-08, |
| "logps/chosen": -47.2071533203125, |
| "logps/rejected": -58.098148345947266, |
| "loss": 0.5076, |
| "losses/dpo": 0.5357474088668823, |
| "losses/sft": 1.654085636138916, |
| "losses/total": 0.5357474088668823, |
| "ref_logps/chosen": -38.56999588012695, |
| "ref_logps/rejected": -43.37090301513672, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -0.8637155890464783, |
| "rewards/margins": 0.6090089678764343, |
| "rewards/rejected": -1.4727245569229126, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 7.551625004814956, |
| "learning_rate": 2.6685393258426963e-08, |
| "logps/chosen": -45.72412109375, |
| "logps/rejected": -56.43421173095703, |
| "loss": 0.5474, |
| "losses/dpo": 0.544715404510498, |
| "losses/sft": 1.5618551969528198, |
| "losses/total": 0.544715404510498, |
| "ref_logps/chosen": -37.27866744995117, |
| "ref_logps/rejected": -42.84328079223633, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8445456027984619, |
| "rewards/margins": 0.5145478248596191, |
| "rewards/rejected": -1.359093427658081, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 8.025864212794117, |
| "learning_rate": 2.5280898876404493e-08, |
| "logps/chosen": -45.621158599853516, |
| "logps/rejected": -60.68471145629883, |
| "loss": 0.5285, |
| "losses/dpo": 0.5538164377212524, |
| "losses/sft": 1.5718330144882202, |
| "losses/total": 0.5538164377212524, |
| "ref_logps/chosen": -36.82067108154297, |
| "ref_logps/rejected": -46.61594009399414, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.8800492286682129, |
| "rewards/margins": 0.5268282890319824, |
| "rewards/rejected": -1.4068775177001953, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.86, |
| "grad_norm": 7.047129440366168, |
| "learning_rate": 2.387640449438202e-08, |
| "logps/chosen": -44.4951057434082, |
| "logps/rejected": -50.4869499206543, |
| "loss": 0.549, |
| "losses/dpo": 0.490747332572937, |
| "losses/sft": 1.6444151401519775, |
| "losses/total": 0.490747332572937, |
| "ref_logps/chosen": -36.696903228759766, |
| "ref_logps/rejected": -37.55984878540039, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7798205614089966, |
| "rewards/margins": 0.5128894448280334, |
| "rewards/rejected": -1.2927099466323853, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.87, |
| "grad_norm": 7.471899506757266, |
| "learning_rate": 2.2471910112359548e-08, |
| "logps/chosen": -47.25148391723633, |
| "logps/rejected": -59.717864990234375, |
| "loss": 0.5358, |
| "losses/dpo": 0.6100134253501892, |
| "losses/sft": 1.9196665287017822, |
| "losses/total": 0.6100134253501892, |
| "ref_logps/chosen": -37.96227264404297, |
| "ref_logps/rejected": -44.814449310302734, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.9289212226867676, |
| "rewards/margins": 0.5614204406738281, |
| "rewards/rejected": -1.4903416633605957, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 8.004493640627455, |
| "learning_rate": 2.106741573033708e-08, |
| "logps/chosen": -44.516780853271484, |
| "logps/rejected": -51.935089111328125, |
| "loss": 0.6119, |
| "losses/dpo": 0.556452751159668, |
| "losses/sft": 1.4079639911651611, |
| "losses/total": 0.556452751159668, |
| "ref_logps/chosen": -35.908897399902344, |
| "ref_logps/rejected": -39.84672546386719, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.8607881665229797, |
| "rewards/margins": 0.34804895520210266, |
| "rewards/rejected": -1.2088370323181152, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 7.393197706656567, |
| "learning_rate": 1.9662921348314606e-08, |
| "logps/chosen": -44.82762908935547, |
| "logps/rejected": -59.140480041503906, |
| "loss": 0.509, |
| "losses/dpo": 0.420447438955307, |
| "losses/sft": 1.7410156726837158, |
| "losses/total": 0.420447438955307, |
| "ref_logps/chosen": -36.08792495727539, |
| "ref_logps/rejected": -43.82966613769531, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.8739705085754395, |
| "rewards/margins": 0.6571108102798462, |
| "rewards/rejected": -1.5310813188552856, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.89, |
| "grad_norm": 7.992863219139361, |
| "learning_rate": 1.8258426966292133e-08, |
| "logps/chosen": -45.79706573486328, |
| "logps/rejected": -53.040687561035156, |
| "loss": 0.5962, |
| "losses/dpo": 0.6750953197479248, |
| "losses/sft": 1.7228975296020508, |
| "losses/total": 0.6750953197479248, |
| "ref_logps/chosen": -37.67970657348633, |
| "ref_logps/rejected": -40.953521728515625, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.8117363452911377, |
| "rewards/margins": 0.3969798684120178, |
| "rewards/rejected": -1.2087161540985107, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 7.487738517511007, |
| "learning_rate": 1.685393258426966e-08, |
| "logps/chosen": -45.35090637207031, |
| "logps/rejected": -57.14335632324219, |
| "loss": 0.5354, |
| "losses/dpo": 0.5379496812820435, |
| "losses/sft": 1.6705958843231201, |
| "losses/total": 0.5379496812820435, |
| "ref_logps/chosen": -37.31737518310547, |
| "ref_logps/rejected": -43.805870056152344, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -0.8033530712127686, |
| "rewards/margins": 0.5303957462310791, |
| "rewards/rejected": -1.3337488174438477, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.91, |
| "grad_norm": 8.22102430010328, |
| "learning_rate": 1.544943820224719e-08, |
| "logps/chosen": -47.1776237487793, |
| "logps/rejected": -54.27086639404297, |
| "loss": 0.5733, |
| "losses/dpo": 0.5613248348236084, |
| "losses/sft": 1.773917317390442, |
| "losses/total": 0.5613248348236084, |
| "ref_logps/chosen": -38.05524444580078, |
| "ref_logps/rejected": -40.46519470214844, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.9122380614280701, |
| "rewards/margins": 0.4683291018009186, |
| "rewards/rejected": -1.3805670738220215, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.91, |
| "grad_norm": 7.908569868218082, |
| "learning_rate": 1.4044943820224718e-08, |
| "logps/chosen": -44.001075744628906, |
| "logps/rejected": -60.508758544921875, |
| "loss": 0.5285, |
| "losses/dpo": 0.5084520578384399, |
| "losses/sft": 1.5907535552978516, |
| "losses/total": 0.5084520578384399, |
| "ref_logps/chosen": -34.90777587890625, |
| "ref_logps/rejected": -45.287864685058594, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.9093303084373474, |
| "rewards/margins": 0.6127593517303467, |
| "rewards/rejected": -1.5220897197723389, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 7.329445132361356, |
| "learning_rate": 1.2640449438202247e-08, |
| "logps/chosen": -46.82018280029297, |
| "logps/rejected": -53.613643646240234, |
| "loss": 0.521, |
| "losses/dpo": 0.5018836259841919, |
| "losses/sft": 1.6243071556091309, |
| "losses/total": 0.5018836259841919, |
| "ref_logps/chosen": -38.469993591308594, |
| "ref_logps/rejected": -39.96007537841797, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8350194692611694, |
| "rewards/margins": 0.5303376913070679, |
| "rewards/rejected": -1.3653571605682373, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.93, |
| "grad_norm": 7.4822766342943225, |
| "learning_rate": 1.1235955056179774e-08, |
| "logps/chosen": -46.926666259765625, |
| "logps/rejected": -55.5013427734375, |
| "loss": 0.5439, |
| "losses/dpo": 0.575495183467865, |
| "losses/sft": 1.3514134883880615, |
| "losses/total": 0.575495183467865, |
| "ref_logps/chosen": -38.2935905456543, |
| "ref_logps/rejected": -42.219791412353516, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.8633076548576355, |
| "rewards/margins": 0.46484747529029846, |
| "rewards/rejected": -1.3281550407409668, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 7.481721913520452, |
| "learning_rate": 9.831460674157303e-09, |
| "logps/chosen": -46.69519805908203, |
| "logps/rejected": -55.18059158325195, |
| "loss": 0.5312, |
| "losses/dpo": 0.5392994284629822, |
| "losses/sft": 2.022167682647705, |
| "losses/total": 0.5392994284629822, |
| "ref_logps/chosen": -38.2025146484375, |
| "ref_logps/rejected": -41.62324905395508, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -0.8492681384086609, |
| "rewards/margins": 0.506466269493103, |
| "rewards/rejected": -1.3557343482971191, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 8.860083156452712, |
| "learning_rate": 8.42696629213483e-09, |
| "logps/chosen": -47.0518798828125, |
| "logps/rejected": -56.05253601074219, |
| "loss": 0.6151, |
| "losses/dpo": 0.8160465955734253, |
| "losses/sft": 1.661864161491394, |
| "losses/total": 0.8160465955734253, |
| "ref_logps/chosen": -37.67930603027344, |
| "ref_logps/rejected": -42.802921295166016, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.9372565746307373, |
| "rewards/margins": 0.3877047896385193, |
| "rewards/rejected": -1.3249614238739014, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 7.697893962559924, |
| "learning_rate": 7.022471910112359e-09, |
| "logps/chosen": -46.420570373535156, |
| "logps/rejected": -56.345977783203125, |
| "loss": 0.5154, |
| "losses/dpo": 0.5586492419242859, |
| "losses/sft": 1.621840476989746, |
| "losses/total": 0.5586492419242859, |
| "ref_logps/chosen": -38.219852447509766, |
| "ref_logps/rejected": -42.38871765136719, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -0.8200712203979492, |
| "rewards/margins": 0.5756551027297974, |
| "rewards/rejected": -1.395726203918457, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 7.398815606595402, |
| "learning_rate": 5.617977528089887e-09, |
| "logps/chosen": -45.353294372558594, |
| "logps/rejected": -56.47963333129883, |
| "loss": 0.5242, |
| "losses/dpo": 0.5106035470962524, |
| "losses/sft": 1.4234966039657593, |
| "losses/total": 0.5106035470962524, |
| "ref_logps/chosen": -36.788330078125, |
| "ref_logps/rejected": -42.60810852050781, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.8564971089363098, |
| "rewards/margins": 0.5306553244590759, |
| "rewards/rejected": -1.3871524333953857, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.97, |
| "grad_norm": 7.0449225391612895, |
| "learning_rate": 4.213483146067415e-09, |
| "logps/chosen": -44.40395736694336, |
| "logps/rejected": -53.697776794433594, |
| "loss": 0.5379, |
| "losses/dpo": 0.5200778841972351, |
| "losses/sft": 1.9024913311004639, |
| "losses/total": 0.5200778841972351, |
| "ref_logps/chosen": -36.2678337097168, |
| "ref_logps/rejected": -40.272247314453125, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.8136123418807983, |
| "rewards/margins": 0.5289404392242432, |
| "rewards/rejected": -1.3425527811050415, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.97, |
| "grad_norm": 7.8920504953670525, |
| "learning_rate": 2.8089887640449435e-09, |
| "logps/chosen": -45.78767776489258, |
| "logps/rejected": -58.19701385498047, |
| "loss": 0.5882, |
| "losses/dpo": 0.5196930170059204, |
| "losses/sft": 1.4936178922653198, |
| "losses/total": 0.5196930170059204, |
| "ref_logps/chosen": -36.467750549316406, |
| "ref_logps/rejected": -44.57653045654297, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.9319925904273987, |
| "rewards/margins": 0.43005576729774475, |
| "rewards/rejected": -1.3620483875274658, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.98, |
| "grad_norm": 6.749020955821219, |
| "learning_rate": 1.4044943820224717e-09, |
| "logps/chosen": -43.89699935913086, |
| "logps/rejected": -53.18260955810547, |
| "loss": 0.5308, |
| "losses/dpo": 0.7003037333488464, |
| "losses/sft": 1.696626901626587, |
| "losses/total": 0.7003037333488464, |
| "ref_logps/chosen": -35.854217529296875, |
| "ref_logps/rejected": -39.928733825683594, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.8042781949043274, |
| "rewards/margins": 0.5211097002029419, |
| "rewards/rejected": -1.3253878355026245, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.99, |
| "grad_norm": 8.026771636400738, |
| "learning_rate": 0.0, |
| "logps/chosen": -48.76679229736328, |
| "logps/rejected": -59.84498596191406, |
| "loss": 0.5292, |
| "losses/dpo": 0.46332383155822754, |
| "losses/sft": 1.643686056137085, |
| "losses/total": 0.46332383155822754, |
| "ref_logps/chosen": -39.41192626953125, |
| "ref_logps/rejected": -44.815162658691406, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.935486912727356, |
| "rewards/margins": 0.567494809627533, |
| "rewards/rejected": -1.5029817819595337, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.99, |
| "step": 396, |
| "total_flos": 0.0, |
| "train_loss": 0.6025580500412469, |
| "train_runtime": 11600.2001, |
| "train_samples_per_second": 4.386, |
| "train_steps_per_second": 0.034 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 396, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 70, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|