| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9962453066332917, |
| "eval_steps": 500, |
| "global_step": 1197, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.0416666666666667e-07, |
| "logps/chosen": -71.84510803222656, |
| "logps/rejected": -68.73027038574219, |
| "loss": 0.7215, |
| "losses/dpo": 0.7109084129333496, |
| "losses/sft": 1.546567678451538, |
| "losses/total": 0.7109084129333496, |
| "ref_logps/chosen": -71.75371551513672, |
| "ref_logps/rejected": -68.71001434326172, |
| "rewards/accuracies": 0.5049999952316284, |
| "rewards/chosen": -0.009138082154095173, |
| "rewards/margins": -0.0071117933839559555, |
| "rewards/rejected": -0.0020262906327843666, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 2.0833333333333333e-07, |
| "logps/chosen": -69.88240814208984, |
| "logps/rejected": -69.54386901855469, |
| "loss": 0.7126, |
| "losses/dpo": 0.6903221607208252, |
| "losses/sft": 1.5194830894470215, |
| "losses/total": 0.6903221607208252, |
| "ref_logps/chosen": -69.94444274902344, |
| "ref_logps/rejected": -69.52371978759766, |
| "rewards/accuracies": 0.4970000088214874, |
| "rewards/chosen": 0.006204119883477688, |
| "rewards/margins": 0.008218951523303986, |
| "rewards/rejected": -0.002014830242842436, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 3.1249999999999997e-07, |
| "logps/chosen": -73.9260482788086, |
| "logps/rejected": -71.71464538574219, |
| "loss": 0.7231, |
| "losses/dpo": 0.7194635272026062, |
| "losses/sft": 1.5661953687667847, |
| "losses/total": 0.7194635272026062, |
| "ref_logps/chosen": -73.91621398925781, |
| "ref_logps/rejected": -71.82748413085938, |
| "rewards/accuracies": 0.5029999613761902, |
| "rewards/chosen": -0.0009833112126216292, |
| "rewards/margins": -0.01226747315376997, |
| "rewards/rejected": 0.011284159496426582, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.1666666666666667e-07, |
| "logps/chosen": -68.9831314086914, |
| "logps/rejected": -66.86358642578125, |
| "loss": 0.7192, |
| "losses/dpo": 0.7422583103179932, |
| "losses/sft": 1.5391790866851807, |
| "losses/total": 0.7422583103179932, |
| "ref_logps/chosen": -68.984130859375, |
| "ref_logps/rejected": -66.90238189697266, |
| "rewards/accuracies": 0.4930000305175781, |
| "rewards/chosen": 0.00010019920591730624, |
| "rewards/margins": -0.00377923552878201, |
| "rewards/rejected": 0.003879436058923602, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.976787372330547e-07, |
| "logps/chosen": -73.55087280273438, |
| "logps/rejected": -71.61868286132812, |
| "loss": 0.7077, |
| "losses/dpo": 0.7084662914276123, |
| "losses/sft": 1.5277128219604492, |
| "losses/total": 0.7084662914276123, |
| "ref_logps/chosen": -73.66172790527344, |
| "ref_logps/rejected": -71.5505599975586, |
| "rewards/accuracies": 0.5219999551773071, |
| "rewards/chosen": 0.011085684411227703, |
| "rewards/margins": 0.017898183315992355, |
| "rewards/rejected": -0.00681249750778079, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.860724233983287e-07, |
| "logps/chosen": -69.85209655761719, |
| "logps/rejected": -69.75456237792969, |
| "loss": 0.7016, |
| "losses/dpo": 0.6934231519699097, |
| "losses/sft": 1.5373976230621338, |
| "losses/total": 0.6934231519699097, |
| "ref_logps/chosen": -70.19540405273438, |
| "ref_logps/rejected": -69.83726501464844, |
| "rewards/accuracies": 0.5325000286102295, |
| "rewards/chosen": 0.0343310683965683, |
| "rewards/margins": 0.026060676202178, |
| "rewards/rejected": 0.008270387537777424, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.7446610956360255e-07, |
| "logps/chosen": -69.9237289428711, |
| "logps/rejected": -69.56204223632812, |
| "loss": 0.6826, |
| "losses/dpo": 0.6780227422714233, |
| "losses/sft": 1.5951652526855469, |
| "losses/total": 0.6780227422714233, |
| "ref_logps/chosen": -70.6123046875, |
| "ref_logps/rejected": -69.56880187988281, |
| "rewards/accuracies": 0.5539999604225159, |
| "rewards/chosen": 0.06885794550180435, |
| "rewards/margins": 0.0681825652718544, |
| "rewards/rejected": 0.0006753735360689461, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.628597957288765e-07, |
| "logps/chosen": -71.02560424804688, |
| "logps/rejected": -69.64324951171875, |
| "loss": 0.679, |
| "losses/dpo": 0.6822719573974609, |
| "losses/sft": 1.5291378498077393, |
| "losses/total": 0.6822719573974609, |
| "ref_logps/chosen": -71.80342102050781, |
| "ref_logps/rejected": -69.58236694335938, |
| "rewards/accuracies": 0.5559999942779541, |
| "rewards/chosen": 0.07778114080429077, |
| "rewards/margins": 0.08386911451816559, |
| "rewards/rejected": -0.006087968125939369, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.512534818941504e-07, |
| "logps/chosen": -67.88444519042969, |
| "logps/rejected": -68.10497283935547, |
| "loss": 0.679, |
| "losses/dpo": 0.6802477240562439, |
| "losses/sft": 1.5126712322235107, |
| "losses/total": 0.6802477240562439, |
| "ref_logps/chosen": -68.86332702636719, |
| "ref_logps/rejected": -68.26943969726562, |
| "rewards/accuracies": 0.5540000200271606, |
| "rewards/chosen": 0.0978882685303688, |
| "rewards/margins": 0.08144120872020721, |
| "rewards/rejected": 0.01644706539809704, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.3964716805942433e-07, |
| "logps/chosen": -69.61144256591797, |
| "logps/rejected": -71.70446014404297, |
| "loss": 0.661, |
| "losses/dpo": 0.6608595848083496, |
| "losses/sft": 1.5144795179367065, |
| "losses/total": 0.6608595848083496, |
| "ref_logps/chosen": -70.85598754882812, |
| "ref_logps/rejected": -71.67627716064453, |
| "rewards/accuracies": 0.5940000414848328, |
| "rewards/chosen": 0.12445437163114548, |
| "rewards/margins": 0.12727266550064087, |
| "rewards/rejected": -0.0028182892128825188, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.280408542246982e-07, |
| "logps/chosen": -69.20745849609375, |
| "logps/rejected": -68.46233367919922, |
| "loss": 0.6577, |
| "losses/dpo": 0.669312059879303, |
| "losses/sft": 1.5866602659225464, |
| "losses/total": 0.669312059879303, |
| "ref_logps/chosen": -70.55343627929688, |
| "ref_logps/rejected": -68.40556335449219, |
| "rewards/accuracies": 0.5860000252723694, |
| "rewards/chosen": 0.13459768891334534, |
| "rewards/margins": 0.14027482271194458, |
| "rewards/rejected": -0.005677163600921631, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.164345403899721e-07, |
| "logps/chosen": -70.11485290527344, |
| "logps/rejected": -68.45763397216797, |
| "loss": 0.649, |
| "losses/dpo": 0.6586881875991821, |
| "losses/sft": 1.5220152139663696, |
| "losses/total": 0.6586881875991821, |
| "ref_logps/chosen": -71.6851806640625, |
| "ref_logps/rejected": -68.39569091796875, |
| "rewards/accuracies": 0.5975000262260437, |
| "rewards/chosen": 0.15703237056732178, |
| "rewards/margins": 0.16322720050811768, |
| "rewards/rejected": -0.006194834131747484, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.04828226555246e-07, |
| "logps/chosen": -69.53617095947266, |
| "logps/rejected": -68.70401763916016, |
| "loss": 0.6401, |
| "losses/dpo": 0.6386440396308899, |
| "losses/sft": 1.4834158420562744, |
| "losses/total": 0.6386440396308899, |
| "ref_logps/chosen": -71.28308868408203, |
| "ref_logps/rejected": -68.5333480834961, |
| "rewards/accuracies": 0.6014999151229858, |
| "rewards/chosen": 0.17469124495983124, |
| "rewards/margins": 0.19175761938095093, |
| "rewards/rejected": -0.017066391184926033, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.9322191272051997e-07, |
| "logps/chosen": -69.60104370117188, |
| "logps/rejected": -70.08628845214844, |
| "loss": 0.6349, |
| "losses/dpo": 0.649332582950592, |
| "losses/sft": 1.4883217811584473, |
| "losses/total": 0.649332582950592, |
| "ref_logps/chosen": -71.48506164550781, |
| "ref_logps/rejected": -69.83267211914062, |
| "rewards/accuracies": 0.6045000553131104, |
| "rewards/chosen": 0.18840213119983673, |
| "rewards/margins": 0.2137639820575714, |
| "rewards/rejected": -0.025361843407154083, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.816155988857939e-07, |
| "logps/chosen": -68.25212097167969, |
| "logps/rejected": -69.1668930053711, |
| "loss": 0.6273, |
| "losses/dpo": 0.6162423491477966, |
| "losses/sft": 1.5002387762069702, |
| "losses/total": 0.6162423491477966, |
| "ref_logps/chosen": -70.23272705078125, |
| "ref_logps/rejected": -68.74607849121094, |
| "rewards/accuracies": 0.6295000314712524, |
| "rewards/chosen": 0.19805949926376343, |
| "rewards/margins": 0.24014097452163696, |
| "rewards/rejected": -0.04208146035671234, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.700092850510678e-07, |
| "logps/chosen": -70.95040893554688, |
| "logps/rejected": -72.96726989746094, |
| "loss": 0.6195, |
| "losses/dpo": 0.6564822196960449, |
| "losses/sft": 1.560630440711975, |
| "losses/total": 0.6564822196960449, |
| "ref_logps/chosen": -72.91325378417969, |
| "ref_logps/rejected": -72.14061737060547, |
| "rewards/accuracies": 0.6350000500679016, |
| "rewards/chosen": 0.19628457725048065, |
| "rewards/margins": 0.2789497375488281, |
| "rewards/rejected": -0.08266513794660568, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.5840297121634165e-07, |
| "logps/chosen": -67.00164794921875, |
| "logps/rejected": -69.44139099121094, |
| "loss": 0.6167, |
| "losses/dpo": 0.6204876899719238, |
| "losses/sft": 1.4992446899414062, |
| "losses/total": 0.6204876899719238, |
| "ref_logps/chosen": -68.97762298583984, |
| "ref_logps/rejected": -68.63683319091797, |
| "rewards/accuracies": 0.6190000772476196, |
| "rewards/chosen": 0.19759786128997803, |
| "rewards/margins": 0.27805399894714355, |
| "rewards/rejected": -0.08045615255832672, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.4679665738161556e-07, |
| "logps/chosen": -69.90442657470703, |
| "logps/rejected": -72.69268035888672, |
| "loss": 0.6054, |
| "losses/dpo": 0.598928689956665, |
| "losses/sft": 1.5615432262420654, |
| "losses/total": 0.598928689956665, |
| "ref_logps/chosen": -71.99002838134766, |
| "ref_logps/rejected": -71.58721923828125, |
| "rewards/accuracies": 0.6365000009536743, |
| "rewards/chosen": 0.20855939388275146, |
| "rewards/margins": 0.3191070556640625, |
| "rewards/rejected": -0.11054765433073044, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.351903435468895e-07, |
| "logps/chosen": -68.88660430908203, |
| "logps/rejected": -70.41629791259766, |
| "loss": 0.5879, |
| "losses/dpo": 0.5859370231628418, |
| "losses/sft": 1.5487860441207886, |
| "losses/total": 0.5859370231628418, |
| "ref_logps/chosen": -71.17853546142578, |
| "ref_logps/rejected": -69.02958679199219, |
| "rewards/accuracies": 0.6640000343322754, |
| "rewards/chosen": 0.22919251024723053, |
| "rewards/margins": 0.36786285042762756, |
| "rewards/rejected": -0.13867038488388062, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.2358402971216343e-07, |
| "logps/chosen": -69.55220031738281, |
| "logps/rejected": -70.18158721923828, |
| "loss": 0.6038, |
| "losses/dpo": 0.6201021075248718, |
| "losses/sft": 1.5225414037704468, |
| "losses/total": 0.6201021075248718, |
| "ref_logps/chosen": -71.74359130859375, |
| "ref_logps/rejected": -68.91885375976562, |
| "rewards/accuracies": 0.6455000042915344, |
| "rewards/chosen": 0.2191377878189087, |
| "rewards/margins": 0.34541237354278564, |
| "rewards/rejected": -0.12627457082271576, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 3.1197771587743734e-07, |
| "logps/chosen": -69.61231231689453, |
| "logps/rejected": -72.09597778320312, |
| "loss": 0.5827, |
| "losses/dpo": 0.5971881747245789, |
| "losses/sft": 1.4868888854980469, |
| "losses/total": 0.5971881747245789, |
| "ref_logps/chosen": -71.90864562988281, |
| "ref_logps/rejected": -70.28910827636719, |
| "rewards/accuracies": 0.6584999561309814, |
| "rewards/chosen": 0.22963352501392365, |
| "rewards/margins": 0.41032031178474426, |
| "rewards/rejected": -0.18068677186965942, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 3.003714020427112e-07, |
| "logps/chosen": -69.58737182617188, |
| "logps/rejected": -70.84317016601562, |
| "loss": 0.5868, |
| "losses/dpo": 0.5683429837226868, |
| "losses/sft": 1.5186117887496948, |
| "losses/total": 0.5683429837226868, |
| "ref_logps/chosen": -71.8995590209961, |
| "ref_logps/rejected": -68.87027740478516, |
| "rewards/accuracies": 0.6439999938011169, |
| "rewards/chosen": 0.23121845722198486, |
| "rewards/margins": 0.42850860953330994, |
| "rewards/rejected": -0.19729015231132507, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.887650882079851e-07, |
| "logps/chosen": -69.75614166259766, |
| "logps/rejected": -71.54226684570312, |
| "loss": 0.5853, |
| "losses/dpo": 0.573384165763855, |
| "losses/sft": 1.506340742111206, |
| "losses/total": 0.573384165763855, |
| "ref_logps/chosen": -71.97299194335938, |
| "ref_logps/rejected": -69.5394287109375, |
| "rewards/accuracies": 0.6439999938011169, |
| "rewards/chosen": 0.2216847836971283, |
| "rewards/margins": 0.4219689667224884, |
| "rewards/rejected": -0.2002841979265213, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.77158774373259e-07, |
| "logps/chosen": -67.17892456054688, |
| "logps/rejected": -72.1897201538086, |
| "loss": 0.5811, |
| "losses/dpo": 0.5949756503105164, |
| "losses/sft": 1.5656284093856812, |
| "losses/total": 0.5949756503105164, |
| "ref_logps/chosen": -69.0622329711914, |
| "ref_logps/rejected": -69.59004974365234, |
| "rewards/accuracies": 0.658500075340271, |
| "rewards/chosen": 0.18833142518997192, |
| "rewards/margins": 0.4482985734939575, |
| "rewards/rejected": -0.2599670886993408, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.65552460538533e-07, |
| "logps/chosen": -67.06172180175781, |
| "logps/rejected": -71.94342041015625, |
| "loss": 0.5723, |
| "losses/dpo": 0.55665123462677, |
| "losses/sft": 1.5321460962295532, |
| "losses/total": 0.55665123462677, |
| "ref_logps/chosen": -68.80352783203125, |
| "ref_logps/rejected": -69.0389175415039, |
| "rewards/accuracies": 0.6610000133514404, |
| "rewards/chosen": 0.17417989671230316, |
| "rewards/margins": 0.46463003754615784, |
| "rewards/rejected": -0.29045018553733826, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.539461467038069e-07, |
| "logps/chosen": -71.48672485351562, |
| "logps/rejected": -74.49577331542969, |
| "loss": 0.5551, |
| "losses/dpo": 0.5403231382369995, |
| "losses/sft": 1.5089354515075684, |
| "losses/total": 0.5403231382369995, |
| "ref_logps/chosen": -73.56059265136719, |
| "ref_logps/rejected": -71.07339477539062, |
| "rewards/accuracies": 0.6800000071525574, |
| "rewards/chosen": 0.20738649368286133, |
| "rewards/margins": 0.5496238470077515, |
| "rewards/rejected": -0.34223735332489014, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.4233983286908075e-07, |
| "logps/chosen": -70.11666107177734, |
| "logps/rejected": -72.92801666259766, |
| "loss": 0.5697, |
| "losses/dpo": 0.5605096817016602, |
| "losses/sft": 1.5102070569992065, |
| "losses/total": 0.5605096817016602, |
| "ref_logps/chosen": -71.93968200683594, |
| "ref_logps/rejected": -69.70313262939453, |
| "rewards/accuracies": 0.6594999432563782, |
| "rewards/chosen": 0.18230296671390533, |
| "rewards/margins": 0.5047909021377563, |
| "rewards/rejected": -0.32248786091804504, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.3073351903435469e-07, |
| "logps/chosen": -70.7259750366211, |
| "logps/rejected": -74.96146392822266, |
| "loss": 0.5596, |
| "losses/dpo": 0.5713181495666504, |
| "losses/sft": 1.501438021659851, |
| "losses/total": 0.5713181495666504, |
| "ref_logps/chosen": -72.50656127929688, |
| "ref_logps/rejected": -71.40023803710938, |
| "rewards/accuracies": 0.6769999861717224, |
| "rewards/chosen": 0.1780581921339035, |
| "rewards/margins": 0.5341811180114746, |
| "rewards/rejected": -0.3561229705810547, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2.191272051996286e-07, |
| "logps/chosen": -71.7147445678711, |
| "logps/rejected": -74.73912048339844, |
| "loss": 0.5536, |
| "losses/dpo": 0.5598438382148743, |
| "losses/sft": 1.6115312576293945, |
| "losses/total": 0.5598438382148743, |
| "ref_logps/chosen": -73.46680450439453, |
| "ref_logps/rejected": -70.61848449707031, |
| "rewards/accuracies": 0.6815000772476196, |
| "rewards/chosen": 0.17520827054977417, |
| "rewards/margins": 0.5872728228569031, |
| "rewards/rejected": -0.4120645225048065, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2.075208913649025e-07, |
| "logps/chosen": -70.7854232788086, |
| "logps/rejected": -74.75676727294922, |
| "loss": 0.55, |
| "losses/dpo": 0.5355216860771179, |
| "losses/sft": 1.5508781671524048, |
| "losses/total": 0.5355216860771179, |
| "ref_logps/chosen": -72.55353546142578, |
| "ref_logps/rejected": -70.52770233154297, |
| "rewards/accuracies": 0.6890000104904175, |
| "rewards/chosen": 0.1768111288547516, |
| "rewards/margins": 0.5997176170349121, |
| "rewards/rejected": -0.4229064881801605, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.959145775301764e-07, |
| "logps/chosen": -69.92965698242188, |
| "logps/rejected": -75.43944549560547, |
| "loss": 0.5567, |
| "losses/dpo": 0.5669773817062378, |
| "losses/sft": 1.5475414991378784, |
| "losses/total": 0.5669773817062378, |
| "ref_logps/chosen": -71.25016021728516, |
| "ref_logps/rejected": -70.77532958984375, |
| "rewards/accuracies": 0.6794999837875366, |
| "rewards/chosen": 0.13204967975616455, |
| "rewards/margins": 0.5984623432159424, |
| "rewards/rejected": -0.4664126932621002, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.8430826369545033e-07, |
| "logps/chosen": -70.1933364868164, |
| "logps/rejected": -74.6698226928711, |
| "loss": 0.5486, |
| "losses/dpo": 0.543175458908081, |
| "losses/sft": 1.5320526361465454, |
| "losses/total": 0.543175458908081, |
| "ref_logps/chosen": -71.89373016357422, |
| "ref_logps/rejected": -70.11508178710938, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.17003829777240753, |
| "rewards/margins": 0.6255122423171997, |
| "rewards/rejected": -0.455473929643631, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.7270194986072424e-07, |
| "logps/chosen": -68.841064453125, |
| "logps/rejected": -75.60282897949219, |
| "loss": 0.5496, |
| "losses/dpo": 0.5592978596687317, |
| "losses/sft": 1.591374158859253, |
| "losses/total": 0.5592978596687317, |
| "ref_logps/chosen": -70.10298156738281, |
| "ref_logps/rejected": -70.73596954345703, |
| "rewards/accuracies": 0.684499979019165, |
| "rewards/chosen": 0.12619122862815857, |
| "rewards/margins": 0.6128779053688049, |
| "rewards/rejected": -0.48668670654296875, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.6109563602599812e-07, |
| "logps/chosen": -70.97602081298828, |
| "logps/rejected": -75.59082794189453, |
| "loss": 0.5226, |
| "losses/dpo": 0.5137518048286438, |
| "losses/sft": 1.4946039915084839, |
| "losses/total": 0.5137518048286438, |
| "ref_logps/chosen": -72.67564392089844, |
| "ref_logps/rejected": -70.24249267578125, |
| "rewards/accuracies": 0.7165000438690186, |
| "rewards/chosen": 0.169962078332901, |
| "rewards/margins": 0.704794704914093, |
| "rewards/rejected": -0.5348325967788696, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.4948932219127206e-07, |
| "logps/chosen": -69.44739532470703, |
| "logps/rejected": -74.73712158203125, |
| "loss": 0.5483, |
| "losses/dpo": 0.554201602935791, |
| "losses/sft": 1.5423518419265747, |
| "losses/total": 0.554201602935791, |
| "ref_logps/chosen": -70.63737487792969, |
| "ref_logps/rejected": -69.3394546508789, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.11899794638156891, |
| "rewards/margins": 0.6587647795677185, |
| "rewards/rejected": -0.5397669076919556, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.3788300835654597e-07, |
| "logps/chosen": -67.69676971435547, |
| "logps/rejected": -72.41621398925781, |
| "loss": 0.554, |
| "losses/dpo": 0.536446750164032, |
| "losses/sft": 1.52887761592865, |
| "losses/total": 0.536446750164032, |
| "ref_logps/chosen": -68.82073974609375, |
| "ref_logps/rejected": -67.15235137939453, |
| "rewards/accuracies": 0.6720000505447388, |
| "rewards/chosen": 0.11239679157733917, |
| "rewards/margins": 0.6387830972671509, |
| "rewards/rejected": -0.5263863205909729, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.2627669452181985e-07, |
| "logps/chosen": -68.61820983886719, |
| "logps/rejected": -75.03334045410156, |
| "loss": 0.547, |
| "losses/dpo": 0.5469151139259338, |
| "losses/sft": 1.524001121520996, |
| "losses/total": 0.5469151139259338, |
| "ref_logps/chosen": -69.68536376953125, |
| "ref_logps/rejected": -69.33779907226562, |
| "rewards/accuracies": 0.6725000143051147, |
| "rewards/chosen": 0.10671478509902954, |
| "rewards/margins": 0.6762691736221313, |
| "rewards/rejected": -0.5695543885231018, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.1467038068709377e-07, |
| "logps/chosen": -71.48025512695312, |
| "logps/rejected": -76.9962387084961, |
| "loss": 0.5328, |
| "losses/dpo": 0.5382718443870544, |
| "losses/sft": 1.5356690883636475, |
| "losses/total": 0.5382718443870544, |
| "ref_logps/chosen": -72.66265869140625, |
| "ref_logps/rejected": -70.87562561035156, |
| "rewards/accuracies": 0.7055000066757202, |
| "rewards/chosen": 0.11823976784944534, |
| "rewards/margins": 0.7303012013435364, |
| "rewards/rejected": -0.6120614409446716, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.0306406685236768e-07, |
| "logps/chosen": -68.71895599365234, |
| "logps/rejected": -74.29911804199219, |
| "loss": 0.5524, |
| "losses/dpo": 0.5623547434806824, |
| "losses/sft": 1.6026860475540161, |
| "losses/total": 0.5623547434806824, |
| "ref_logps/chosen": -69.32998657226562, |
| "ref_logps/rejected": -68.43716430664062, |
| "rewards/accuracies": 0.6655000448226929, |
| "rewards/chosen": 0.06110435351729393, |
| "rewards/margins": 0.6473007202148438, |
| "rewards/rejected": -0.5861963629722595, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.14577530176416e-08, |
| "logps/chosen": -69.40322875976562, |
| "logps/rejected": -73.88810729980469, |
| "loss": 0.5513, |
| "losses/dpo": 0.5662988424301147, |
| "losses/sft": 1.6082065105438232, |
| "losses/total": 0.5662988424301147, |
| "ref_logps/chosen": -70.1698226928711, |
| "ref_logps/rejected": -68.06956481933594, |
| "rewards/accuracies": 0.6759999990463257, |
| "rewards/chosen": 0.07665982842445374, |
| "rewards/margins": 0.6585137248039246, |
| "rewards/rejected": -0.5818539261817932, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.98514391829155e-08, |
| "logps/chosen": -68.32543182373047, |
| "logps/rejected": -74.76167297363281, |
| "loss": 0.5433, |
| "losses/dpo": 0.5388572216033936, |
| "losses/sft": 1.5300703048706055, |
| "losses/total": 0.5388572216033936, |
| "ref_logps/chosen": -69.11152648925781, |
| "ref_logps/rejected": -68.69097900390625, |
| "rewards/accuracies": 0.6825000047683716, |
| "rewards/chosen": 0.07860930263996124, |
| "rewards/margins": 0.685679018497467, |
| "rewards/rejected": -0.6070696115493774, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.824512534818941e-08, |
| "logps/chosen": -70.73451232910156, |
| "logps/rejected": -77.00275421142578, |
| "loss": 0.5239, |
| "losses/dpo": 0.5082178115844727, |
| "losses/sft": 1.4840093851089478, |
| "losses/total": 0.5082178115844727, |
| "ref_logps/chosen": -72.08134460449219, |
| "ref_logps/rejected": -70.49996948242188, |
| "rewards/accuracies": 0.7019999623298645, |
| "rewards/chosen": 0.13468389213085175, |
| "rewards/margins": 0.7849621772766113, |
| "rewards/rejected": -0.6502782702445984, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.6638811513463324e-08, |
| "logps/chosen": -69.95764923095703, |
| "logps/rejected": -77.90116882324219, |
| "loss": 0.5327, |
| "losses/dpo": 0.5328630805015564, |
| "losses/sft": 1.6418886184692383, |
| "losses/total": 0.5328630805015564, |
| "ref_logps/chosen": -70.50801086425781, |
| "ref_logps/rejected": -71.11058807373047, |
| "rewards/accuracies": 0.6959999799728394, |
| "rewards/chosen": 0.05503645911812782, |
| "rewards/margins": 0.7340949773788452, |
| "rewards/rejected": -0.6790586113929749, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.503249767873723e-08, |
| "logps/chosen": -69.83995819091797, |
| "logps/rejected": -75.7170639038086, |
| "loss": 0.5415, |
| "losses/dpo": 0.5642114281654358, |
| "losses/sft": 1.5595824718475342, |
| "losses/total": 0.5642114281654358, |
| "ref_logps/chosen": -70.96809387207031, |
| "ref_logps/rejected": -69.68138885498047, |
| "rewards/accuracies": 0.6990000009536743, |
| "rewards/chosen": 0.11281368136405945, |
| "rewards/margins": 0.7163800001144409, |
| "rewards/rejected": -0.6035662889480591, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.3426183844011144e-08, |
| "logps/chosen": -72.0064697265625, |
| "logps/rejected": -75.64459228515625, |
| "loss": 0.553, |
| "losses/dpo": 0.6136656403541565, |
| "losses/sft": 1.6066731214523315, |
| "losses/total": 0.6136656403541565, |
| "ref_logps/chosen": -72.82428741455078, |
| "ref_logps/rejected": -69.51007843017578, |
| "rewards/accuracies": 0.6880000233650208, |
| "rewards/chosen": 0.08178197592496872, |
| "rewards/margins": 0.6952335834503174, |
| "rewards/rejected": -0.6134517192840576, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.181987000928505e-08, |
| "logps/chosen": -71.19115447998047, |
| "logps/rejected": -74.97571563720703, |
| "loss": 0.5495, |
| "losses/dpo": 0.5555659532546997, |
| "losses/sft": 1.547566533088684, |
| "losses/total": 0.5555659532546997, |
| "ref_logps/chosen": -72.04399108886719, |
| "ref_logps/rejected": -68.92461395263672, |
| "rewards/accuracies": 0.6770000457763672, |
| "rewards/chosen": 0.08528263866901398, |
| "rewards/margins": 0.6903927326202393, |
| "rewards/rejected": -0.6051101088523865, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.0213556174558959e-08, |
| "logps/chosen": -68.98538970947266, |
| "logps/rejected": -74.58392333984375, |
| "loss": 0.5254, |
| "losses/dpo": 0.49954432249069214, |
| "losses/sft": 1.4814612865447998, |
| "losses/total": 0.49954432249069214, |
| "ref_logps/chosen": -70.0972671508789, |
| "ref_logps/rejected": -68.21308898925781, |
| "rewards/accuracies": 0.6914999485015869, |
| "rewards/chosen": 0.11118759214878082, |
| "rewards/margins": 0.7482713460922241, |
| "rewards/rejected": -0.6370838284492493, |
| "step": 1175 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1197, |
| "total_flos": 0.0, |
| "train_loss": 0.5977537606095112, |
| "train_runtime": 15731.8751, |
| "train_samples_per_second": 6.094, |
| "train_steps_per_second": 0.076 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 1197, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": {}, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|