VideoLLaMA-3-7b-combined / trainer_state.json
sameepv21's picture
Upload folder using huggingface_hub
3388daf verified
raw
history blame
29.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9962453066332917,
"eval_steps": 500,
"global_step": 1197,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 1.0416666666666667e-07,
"logps/chosen": -71.84510803222656,
"logps/rejected": -68.73027038574219,
"loss": 0.7215,
"losses/dpo": 0.7109084129333496,
"losses/sft": 1.546567678451538,
"losses/total": 0.7109084129333496,
"ref_logps/chosen": -71.75371551513672,
"ref_logps/rejected": -68.71001434326172,
"rewards/accuracies": 0.5049999952316284,
"rewards/chosen": -0.009138082154095173,
"rewards/margins": -0.0071117933839559555,
"rewards/rejected": -0.0020262906327843666,
"step": 25
},
{
"epoch": 0.13,
"learning_rate": 2.0833333333333333e-07,
"logps/chosen": -69.88240814208984,
"logps/rejected": -69.54386901855469,
"loss": 0.7126,
"losses/dpo": 0.6903221607208252,
"losses/sft": 1.5194830894470215,
"losses/total": 0.6903221607208252,
"ref_logps/chosen": -69.94444274902344,
"ref_logps/rejected": -69.52371978759766,
"rewards/accuracies": 0.4970000088214874,
"rewards/chosen": 0.006204119883477688,
"rewards/margins": 0.008218951523303986,
"rewards/rejected": -0.002014830242842436,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 3.1249999999999997e-07,
"logps/chosen": -73.9260482788086,
"logps/rejected": -71.71464538574219,
"loss": 0.7231,
"losses/dpo": 0.7194635272026062,
"losses/sft": 1.5661953687667847,
"losses/total": 0.7194635272026062,
"ref_logps/chosen": -73.91621398925781,
"ref_logps/rejected": -71.82748413085938,
"rewards/accuracies": 0.5029999613761902,
"rewards/chosen": -0.0009833112126216292,
"rewards/margins": -0.01226747315376997,
"rewards/rejected": 0.011284159496426582,
"step": 75
},
{
"epoch": 0.25,
"learning_rate": 4.1666666666666667e-07,
"logps/chosen": -68.9831314086914,
"logps/rejected": -66.86358642578125,
"loss": 0.7192,
"losses/dpo": 0.7422583103179932,
"losses/sft": 1.5391790866851807,
"losses/total": 0.7422583103179932,
"ref_logps/chosen": -68.984130859375,
"ref_logps/rejected": -66.90238189697266,
"rewards/accuracies": 0.4930000305175781,
"rewards/chosen": 0.00010019920591730624,
"rewards/margins": -0.00377923552878201,
"rewards/rejected": 0.003879436058923602,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 4.976787372330547e-07,
"logps/chosen": -73.55087280273438,
"logps/rejected": -71.61868286132812,
"loss": 0.7077,
"losses/dpo": 0.7084662914276123,
"losses/sft": 1.5277128219604492,
"losses/total": 0.7084662914276123,
"ref_logps/chosen": -73.66172790527344,
"ref_logps/rejected": -71.5505599975586,
"rewards/accuracies": 0.5219999551773071,
"rewards/chosen": 0.011085684411227703,
"rewards/margins": 0.017898183315992355,
"rewards/rejected": -0.00681249750778079,
"step": 125
},
{
"epoch": 0.38,
"learning_rate": 4.860724233983287e-07,
"logps/chosen": -69.85209655761719,
"logps/rejected": -69.75456237792969,
"loss": 0.7016,
"losses/dpo": 0.6934231519699097,
"losses/sft": 1.5373976230621338,
"losses/total": 0.6934231519699097,
"ref_logps/chosen": -70.19540405273438,
"ref_logps/rejected": -69.83726501464844,
"rewards/accuracies": 0.5325000286102295,
"rewards/chosen": 0.0343310683965683,
"rewards/margins": 0.026060676202178,
"rewards/rejected": 0.008270387537777424,
"step": 150
},
{
"epoch": 0.44,
"learning_rate": 4.7446610956360255e-07,
"logps/chosen": -69.9237289428711,
"logps/rejected": -69.56204223632812,
"loss": 0.6826,
"losses/dpo": 0.6780227422714233,
"losses/sft": 1.5951652526855469,
"losses/total": 0.6780227422714233,
"ref_logps/chosen": -70.6123046875,
"ref_logps/rejected": -69.56880187988281,
"rewards/accuracies": 0.5539999604225159,
"rewards/chosen": 0.06885794550180435,
"rewards/margins": 0.0681825652718544,
"rewards/rejected": 0.0006753735360689461,
"step": 175
},
{
"epoch": 0.5,
"learning_rate": 4.628597957288765e-07,
"logps/chosen": -71.02560424804688,
"logps/rejected": -69.64324951171875,
"loss": 0.679,
"losses/dpo": 0.6822719573974609,
"losses/sft": 1.5291378498077393,
"losses/total": 0.6822719573974609,
"ref_logps/chosen": -71.80342102050781,
"ref_logps/rejected": -69.58236694335938,
"rewards/accuracies": 0.5559999942779541,
"rewards/chosen": 0.07778114080429077,
"rewards/margins": 0.08386911451816559,
"rewards/rejected": -0.006087968125939369,
"step": 200
},
{
"epoch": 0.56,
"learning_rate": 4.512534818941504e-07,
"logps/chosen": -67.88444519042969,
"logps/rejected": -68.10497283935547,
"loss": 0.679,
"losses/dpo": 0.6802477240562439,
"losses/sft": 1.5126712322235107,
"losses/total": 0.6802477240562439,
"ref_logps/chosen": -68.86332702636719,
"ref_logps/rejected": -68.26943969726562,
"rewards/accuracies": 0.5540000200271606,
"rewards/chosen": 0.0978882685303688,
"rewards/margins": 0.08144120872020721,
"rewards/rejected": 0.01644706539809704,
"step": 225
},
{
"epoch": 0.63,
"learning_rate": 4.3964716805942433e-07,
"logps/chosen": -69.61144256591797,
"logps/rejected": -71.70446014404297,
"loss": 0.661,
"losses/dpo": 0.6608595848083496,
"losses/sft": 1.5144795179367065,
"losses/total": 0.6608595848083496,
"ref_logps/chosen": -70.85598754882812,
"ref_logps/rejected": -71.67627716064453,
"rewards/accuracies": 0.5940000414848328,
"rewards/chosen": 0.12445437163114548,
"rewards/margins": 0.12727266550064087,
"rewards/rejected": -0.0028182892128825188,
"step": 250
},
{
"epoch": 0.69,
"learning_rate": 4.280408542246982e-07,
"logps/chosen": -69.20745849609375,
"logps/rejected": -68.46233367919922,
"loss": 0.6577,
"losses/dpo": 0.669312059879303,
"losses/sft": 1.5866602659225464,
"losses/total": 0.669312059879303,
"ref_logps/chosen": -70.55343627929688,
"ref_logps/rejected": -68.40556335449219,
"rewards/accuracies": 0.5860000252723694,
"rewards/chosen": 0.13459768891334534,
"rewards/margins": 0.14027482271194458,
"rewards/rejected": -0.005677163600921631,
"step": 275
},
{
"epoch": 0.75,
"learning_rate": 4.164345403899721e-07,
"logps/chosen": -70.11485290527344,
"logps/rejected": -68.45763397216797,
"loss": 0.649,
"losses/dpo": 0.6586881875991821,
"losses/sft": 1.5220152139663696,
"losses/total": 0.6586881875991821,
"ref_logps/chosen": -71.6851806640625,
"ref_logps/rejected": -68.39569091796875,
"rewards/accuracies": 0.5975000262260437,
"rewards/chosen": 0.15703237056732178,
"rewards/margins": 0.16322720050811768,
"rewards/rejected": -0.006194834131747484,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 4.04828226555246e-07,
"logps/chosen": -69.53617095947266,
"logps/rejected": -68.70401763916016,
"loss": 0.6401,
"losses/dpo": 0.6386440396308899,
"losses/sft": 1.4834158420562744,
"losses/total": 0.6386440396308899,
"ref_logps/chosen": -71.28308868408203,
"ref_logps/rejected": -68.5333480834961,
"rewards/accuracies": 0.6014999151229858,
"rewards/chosen": 0.17469124495983124,
"rewards/margins": 0.19175761938095093,
"rewards/rejected": -0.017066391184926033,
"step": 325
},
{
"epoch": 0.88,
"learning_rate": 3.9322191272051997e-07,
"logps/chosen": -69.60104370117188,
"logps/rejected": -70.08628845214844,
"loss": 0.6349,
"losses/dpo": 0.649332582950592,
"losses/sft": 1.4883217811584473,
"losses/total": 0.649332582950592,
"ref_logps/chosen": -71.48506164550781,
"ref_logps/rejected": -69.83267211914062,
"rewards/accuracies": 0.6045000553131104,
"rewards/chosen": 0.18840213119983673,
"rewards/margins": 0.2137639820575714,
"rewards/rejected": -0.025361843407154083,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 3.816155988857939e-07,
"logps/chosen": -68.25212097167969,
"logps/rejected": -69.1668930053711,
"loss": 0.6273,
"losses/dpo": 0.6162423491477966,
"losses/sft": 1.5002387762069702,
"losses/total": 0.6162423491477966,
"ref_logps/chosen": -70.23272705078125,
"ref_logps/rejected": -68.74607849121094,
"rewards/accuracies": 0.6295000314712524,
"rewards/chosen": 0.19805949926376343,
"rewards/margins": 0.24014097452163696,
"rewards/rejected": -0.04208146035671234,
"step": 375
},
{
"epoch": 1.0,
"learning_rate": 3.700092850510678e-07,
"logps/chosen": -70.95040893554688,
"logps/rejected": -72.96726989746094,
"loss": 0.6195,
"losses/dpo": 0.6564822196960449,
"losses/sft": 1.560630440711975,
"losses/total": 0.6564822196960449,
"ref_logps/chosen": -72.91325378417969,
"ref_logps/rejected": -72.14061737060547,
"rewards/accuracies": 0.6350000500679016,
"rewards/chosen": 0.19628457725048065,
"rewards/margins": 0.2789497375488281,
"rewards/rejected": -0.08266513794660568,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 3.5840297121634165e-07,
"logps/chosen": -67.00164794921875,
"logps/rejected": -69.44139099121094,
"loss": 0.6167,
"losses/dpo": 0.6204876899719238,
"losses/sft": 1.4992446899414062,
"losses/total": 0.6204876899719238,
"ref_logps/chosen": -68.97762298583984,
"ref_logps/rejected": -68.63683319091797,
"rewards/accuracies": 0.6190000772476196,
"rewards/chosen": 0.19759786128997803,
"rewards/margins": 0.27805399894714355,
"rewards/rejected": -0.08045615255832672,
"step": 425
},
{
"epoch": 1.13,
"learning_rate": 3.4679665738161556e-07,
"logps/chosen": -69.90442657470703,
"logps/rejected": -72.69268035888672,
"loss": 0.6054,
"losses/dpo": 0.598928689956665,
"losses/sft": 1.5615432262420654,
"losses/total": 0.598928689956665,
"ref_logps/chosen": -71.99002838134766,
"ref_logps/rejected": -71.58721923828125,
"rewards/accuracies": 0.6365000009536743,
"rewards/chosen": 0.20855939388275146,
"rewards/margins": 0.3191070556640625,
"rewards/rejected": -0.11054765433073044,
"step": 450
},
{
"epoch": 1.19,
"learning_rate": 3.351903435468895e-07,
"logps/chosen": -68.88660430908203,
"logps/rejected": -70.41629791259766,
"loss": 0.5879,
"losses/dpo": 0.5859370231628418,
"losses/sft": 1.5487860441207886,
"losses/total": 0.5859370231628418,
"ref_logps/chosen": -71.17853546142578,
"ref_logps/rejected": -69.02958679199219,
"rewards/accuracies": 0.6640000343322754,
"rewards/chosen": 0.22919251024723053,
"rewards/margins": 0.36786285042762756,
"rewards/rejected": -0.13867038488388062,
"step": 475
},
{
"epoch": 1.25,
"learning_rate": 3.2358402971216343e-07,
"logps/chosen": -69.55220031738281,
"logps/rejected": -70.18158721923828,
"loss": 0.6038,
"losses/dpo": 0.6201021075248718,
"losses/sft": 1.5225414037704468,
"losses/total": 0.6201021075248718,
"ref_logps/chosen": -71.74359130859375,
"ref_logps/rejected": -68.91885375976562,
"rewards/accuracies": 0.6455000042915344,
"rewards/chosen": 0.2191377878189087,
"rewards/margins": 0.34541237354278564,
"rewards/rejected": -0.12627457082271576,
"step": 500
},
{
"epoch": 1.31,
"learning_rate": 3.1197771587743734e-07,
"logps/chosen": -69.61231231689453,
"logps/rejected": -72.09597778320312,
"loss": 0.5827,
"losses/dpo": 0.5971881747245789,
"losses/sft": 1.4868888854980469,
"losses/total": 0.5971881747245789,
"ref_logps/chosen": -71.90864562988281,
"ref_logps/rejected": -70.28910827636719,
"rewards/accuracies": 0.6584999561309814,
"rewards/chosen": 0.22963352501392365,
"rewards/margins": 0.41032031178474426,
"rewards/rejected": -0.18068677186965942,
"step": 525
},
{
"epoch": 1.38,
"learning_rate": 3.003714020427112e-07,
"logps/chosen": -69.58737182617188,
"logps/rejected": -70.84317016601562,
"loss": 0.5868,
"losses/dpo": 0.5683429837226868,
"losses/sft": 1.5186117887496948,
"losses/total": 0.5683429837226868,
"ref_logps/chosen": -71.8995590209961,
"ref_logps/rejected": -68.87027740478516,
"rewards/accuracies": 0.6439999938011169,
"rewards/chosen": 0.23121845722198486,
"rewards/margins": 0.42850860953330994,
"rewards/rejected": -0.19729015231132507,
"step": 550
},
{
"epoch": 1.44,
"learning_rate": 2.887650882079851e-07,
"logps/chosen": -69.75614166259766,
"logps/rejected": -71.54226684570312,
"loss": 0.5853,
"losses/dpo": 0.573384165763855,
"losses/sft": 1.506340742111206,
"losses/total": 0.573384165763855,
"ref_logps/chosen": -71.97299194335938,
"ref_logps/rejected": -69.5394287109375,
"rewards/accuracies": 0.6439999938011169,
"rewards/chosen": 0.2216847836971283,
"rewards/margins": 0.4219689667224884,
"rewards/rejected": -0.2002841979265213,
"step": 575
},
{
"epoch": 1.5,
"learning_rate": 2.77158774373259e-07,
"logps/chosen": -67.17892456054688,
"logps/rejected": -72.1897201538086,
"loss": 0.5811,
"losses/dpo": 0.5949756503105164,
"losses/sft": 1.5656284093856812,
"losses/total": 0.5949756503105164,
"ref_logps/chosen": -69.0622329711914,
"ref_logps/rejected": -69.59004974365234,
"rewards/accuracies": 0.658500075340271,
"rewards/chosen": 0.18833142518997192,
"rewards/margins": 0.4482985734939575,
"rewards/rejected": -0.2599670886993408,
"step": 600
},
{
"epoch": 1.56,
"learning_rate": 2.65552460538533e-07,
"logps/chosen": -67.06172180175781,
"logps/rejected": -71.94342041015625,
"loss": 0.5723,
"losses/dpo": 0.55665123462677,
"losses/sft": 1.5321460962295532,
"losses/total": 0.55665123462677,
"ref_logps/chosen": -68.80352783203125,
"ref_logps/rejected": -69.0389175415039,
"rewards/accuracies": 0.6610000133514404,
"rewards/chosen": 0.17417989671230316,
"rewards/margins": 0.46463003754615784,
"rewards/rejected": -0.29045018553733826,
"step": 625
},
{
"epoch": 1.63,
"learning_rate": 2.539461467038069e-07,
"logps/chosen": -71.48672485351562,
"logps/rejected": -74.49577331542969,
"loss": 0.5551,
"losses/dpo": 0.5403231382369995,
"losses/sft": 1.5089354515075684,
"losses/total": 0.5403231382369995,
"ref_logps/chosen": -73.56059265136719,
"ref_logps/rejected": -71.07339477539062,
"rewards/accuracies": 0.6800000071525574,
"rewards/chosen": 0.20738649368286133,
"rewards/margins": 0.5496238470077515,
"rewards/rejected": -0.34223735332489014,
"step": 650
},
{
"epoch": 1.69,
"learning_rate": 2.4233983286908075e-07,
"logps/chosen": -70.11666107177734,
"logps/rejected": -72.92801666259766,
"loss": 0.5697,
"losses/dpo": 0.5605096817016602,
"losses/sft": 1.5102070569992065,
"losses/total": 0.5605096817016602,
"ref_logps/chosen": -71.93968200683594,
"ref_logps/rejected": -69.70313262939453,
"rewards/accuracies": 0.6594999432563782,
"rewards/chosen": 0.18230296671390533,
"rewards/margins": 0.5047909021377563,
"rewards/rejected": -0.32248786091804504,
"step": 675
},
{
"epoch": 1.75,
"learning_rate": 2.3073351903435469e-07,
"logps/chosen": -70.7259750366211,
"logps/rejected": -74.96146392822266,
"loss": 0.5596,
"losses/dpo": 0.5713181495666504,
"losses/sft": 1.501438021659851,
"losses/total": 0.5713181495666504,
"ref_logps/chosen": -72.50656127929688,
"ref_logps/rejected": -71.40023803710938,
"rewards/accuracies": 0.6769999861717224,
"rewards/chosen": 0.1780581921339035,
"rewards/margins": 0.5341811180114746,
"rewards/rejected": -0.3561229705810547,
"step": 700
},
{
"epoch": 1.81,
"learning_rate": 2.191272051996286e-07,
"logps/chosen": -71.7147445678711,
"logps/rejected": -74.73912048339844,
"loss": 0.5536,
"losses/dpo": 0.5598438382148743,
"losses/sft": 1.6115312576293945,
"losses/total": 0.5598438382148743,
"ref_logps/chosen": -73.46680450439453,
"ref_logps/rejected": -70.61848449707031,
"rewards/accuracies": 0.6815000772476196,
"rewards/chosen": 0.17520827054977417,
"rewards/margins": 0.5872728228569031,
"rewards/rejected": -0.4120645225048065,
"step": 725
},
{
"epoch": 1.88,
"learning_rate": 2.075208913649025e-07,
"logps/chosen": -70.7854232788086,
"logps/rejected": -74.75676727294922,
"loss": 0.55,
"losses/dpo": 0.5355216860771179,
"losses/sft": 1.5508781671524048,
"losses/total": 0.5355216860771179,
"ref_logps/chosen": -72.55353546142578,
"ref_logps/rejected": -70.52770233154297,
"rewards/accuracies": 0.6890000104904175,
"rewards/chosen": 0.1768111288547516,
"rewards/margins": 0.5997176170349121,
"rewards/rejected": -0.4229064881801605,
"step": 750
},
{
"epoch": 1.94,
"learning_rate": 1.959145775301764e-07,
"logps/chosen": -69.92965698242188,
"logps/rejected": -75.43944549560547,
"loss": 0.5567,
"losses/dpo": 0.5669773817062378,
"losses/sft": 1.5475414991378784,
"losses/total": 0.5669773817062378,
"ref_logps/chosen": -71.25016021728516,
"ref_logps/rejected": -70.77532958984375,
"rewards/accuracies": 0.6794999837875366,
"rewards/chosen": 0.13204967975616455,
"rewards/margins": 0.5984623432159424,
"rewards/rejected": -0.4664126932621002,
"step": 775
},
{
"epoch": 2.0,
"learning_rate": 1.8430826369545033e-07,
"logps/chosen": -70.1933364868164,
"logps/rejected": -74.6698226928711,
"loss": 0.5486,
"losses/dpo": 0.543175458908081,
"losses/sft": 1.5320526361465454,
"losses/total": 0.543175458908081,
"ref_logps/chosen": -71.89373016357422,
"ref_logps/rejected": -70.11508178710938,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.17003829777240753,
"rewards/margins": 0.6255122423171997,
"rewards/rejected": -0.455473929643631,
"step": 800
},
{
"epoch": 2.07,
"learning_rate": 1.7270194986072424e-07,
"logps/chosen": -68.841064453125,
"logps/rejected": -75.60282897949219,
"loss": 0.5496,
"losses/dpo": 0.5592978596687317,
"losses/sft": 1.591374158859253,
"losses/total": 0.5592978596687317,
"ref_logps/chosen": -70.10298156738281,
"ref_logps/rejected": -70.73596954345703,
"rewards/accuracies": 0.684499979019165,
"rewards/chosen": 0.12619122862815857,
"rewards/margins": 0.6128779053688049,
"rewards/rejected": -0.48668670654296875,
"step": 825
},
{
"epoch": 2.13,
"learning_rate": 1.6109563602599812e-07,
"logps/chosen": -70.97602081298828,
"logps/rejected": -75.59082794189453,
"loss": 0.5226,
"losses/dpo": 0.5137518048286438,
"losses/sft": 1.4946039915084839,
"losses/total": 0.5137518048286438,
"ref_logps/chosen": -72.67564392089844,
"ref_logps/rejected": -70.24249267578125,
"rewards/accuracies": 0.7165000438690186,
"rewards/chosen": 0.169962078332901,
"rewards/margins": 0.704794704914093,
"rewards/rejected": -0.5348325967788696,
"step": 850
},
{
"epoch": 2.19,
"learning_rate": 1.4948932219127206e-07,
"logps/chosen": -69.44739532470703,
"logps/rejected": -74.73712158203125,
"loss": 0.5483,
"losses/dpo": 0.554201602935791,
"losses/sft": 1.5423518419265747,
"losses/total": 0.554201602935791,
"ref_logps/chosen": -70.63737487792969,
"ref_logps/rejected": -69.3394546508789,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.11899794638156891,
"rewards/margins": 0.6587647795677185,
"rewards/rejected": -0.5397669076919556,
"step": 875
},
{
"epoch": 2.25,
"learning_rate": 1.3788300835654597e-07,
"logps/chosen": -67.69676971435547,
"logps/rejected": -72.41621398925781,
"loss": 0.554,
"losses/dpo": 0.536446750164032,
"losses/sft": 1.52887761592865,
"losses/total": 0.536446750164032,
"ref_logps/chosen": -68.82073974609375,
"ref_logps/rejected": -67.15235137939453,
"rewards/accuracies": 0.6720000505447388,
"rewards/chosen": 0.11239679157733917,
"rewards/margins": 0.6387830972671509,
"rewards/rejected": -0.5263863205909729,
"step": 900
},
{
"epoch": 2.32,
"learning_rate": 1.2627669452181985e-07,
"logps/chosen": -68.61820983886719,
"logps/rejected": -75.03334045410156,
"loss": 0.547,
"losses/dpo": 0.5469151139259338,
"losses/sft": 1.524001121520996,
"losses/total": 0.5469151139259338,
"ref_logps/chosen": -69.68536376953125,
"ref_logps/rejected": -69.33779907226562,
"rewards/accuracies": 0.6725000143051147,
"rewards/chosen": 0.10671478509902954,
"rewards/margins": 0.6762691736221313,
"rewards/rejected": -0.5695543885231018,
"step": 925
},
{
"epoch": 2.38,
"learning_rate": 1.1467038068709377e-07,
"logps/chosen": -71.48025512695312,
"logps/rejected": -76.9962387084961,
"loss": 0.5328,
"losses/dpo": 0.5382718443870544,
"losses/sft": 1.5356690883636475,
"losses/total": 0.5382718443870544,
"ref_logps/chosen": -72.66265869140625,
"ref_logps/rejected": -70.87562561035156,
"rewards/accuracies": 0.7055000066757202,
"rewards/chosen": 0.11823976784944534,
"rewards/margins": 0.7303012013435364,
"rewards/rejected": -0.6120614409446716,
"step": 950
},
{
"epoch": 2.44,
"learning_rate": 1.0306406685236768e-07,
"logps/chosen": -68.71895599365234,
"logps/rejected": -74.29911804199219,
"loss": 0.5524,
"losses/dpo": 0.5623547434806824,
"losses/sft": 1.6026860475540161,
"losses/total": 0.5623547434806824,
"ref_logps/chosen": -69.32998657226562,
"ref_logps/rejected": -68.43716430664062,
"rewards/accuracies": 0.6655000448226929,
"rewards/chosen": 0.06110435351729393,
"rewards/margins": 0.6473007202148438,
"rewards/rejected": -0.5861963629722595,
"step": 975
},
{
"epoch": 2.5,
"learning_rate": 9.14577530176416e-08,
"logps/chosen": -69.40322875976562,
"logps/rejected": -73.88810729980469,
"loss": 0.5513,
"losses/dpo": 0.5662988424301147,
"losses/sft": 1.6082065105438232,
"losses/total": 0.5662988424301147,
"ref_logps/chosen": -70.1698226928711,
"ref_logps/rejected": -68.06956481933594,
"rewards/accuracies": 0.6759999990463257,
"rewards/chosen": 0.07665982842445374,
"rewards/margins": 0.6585137248039246,
"rewards/rejected": -0.5818539261817932,
"step": 1000
},
{
"epoch": 2.57,
"learning_rate": 7.98514391829155e-08,
"logps/chosen": -68.32543182373047,
"logps/rejected": -74.76167297363281,
"loss": 0.5433,
"losses/dpo": 0.5388572216033936,
"losses/sft": 1.5300703048706055,
"losses/total": 0.5388572216033936,
"ref_logps/chosen": -69.11152648925781,
"ref_logps/rejected": -68.69097900390625,
"rewards/accuracies": 0.6825000047683716,
"rewards/chosen": 0.07860930263996124,
"rewards/margins": 0.685679018497467,
"rewards/rejected": -0.6070696115493774,
"step": 1025
},
{
"epoch": 2.63,
"learning_rate": 6.824512534818941e-08,
"logps/chosen": -70.73451232910156,
"logps/rejected": -77.00275421142578,
"loss": 0.5239,
"losses/dpo": 0.5082178115844727,
"losses/sft": 1.4840093851089478,
"losses/total": 0.5082178115844727,
"ref_logps/chosen": -72.08134460449219,
"ref_logps/rejected": -70.49996948242188,
"rewards/accuracies": 0.7019999623298645,
"rewards/chosen": 0.13468389213085175,
"rewards/margins": 0.7849621772766113,
"rewards/rejected": -0.6502782702445984,
"step": 1050
},
{
"epoch": 2.69,
"learning_rate": 5.6638811513463324e-08,
"logps/chosen": -69.95764923095703,
"logps/rejected": -77.90116882324219,
"loss": 0.5327,
"losses/dpo": 0.5328630805015564,
"losses/sft": 1.6418886184692383,
"losses/total": 0.5328630805015564,
"ref_logps/chosen": -70.50801086425781,
"ref_logps/rejected": -71.11058807373047,
"rewards/accuracies": 0.6959999799728394,
"rewards/chosen": 0.05503645911812782,
"rewards/margins": 0.7340949773788452,
"rewards/rejected": -0.6790586113929749,
"step": 1075
},
{
"epoch": 2.75,
"learning_rate": 4.503249767873723e-08,
"logps/chosen": -69.83995819091797,
"logps/rejected": -75.7170639038086,
"loss": 0.5415,
"losses/dpo": 0.5642114281654358,
"losses/sft": 1.5595824718475342,
"losses/total": 0.5642114281654358,
"ref_logps/chosen": -70.96809387207031,
"ref_logps/rejected": -69.68138885498047,
"rewards/accuracies": 0.6990000009536743,
"rewards/chosen": 0.11281368136405945,
"rewards/margins": 0.7163800001144409,
"rewards/rejected": -0.6035662889480591,
"step": 1100
},
{
"epoch": 2.82,
"learning_rate": 3.3426183844011144e-08,
"logps/chosen": -72.0064697265625,
"logps/rejected": -75.64459228515625,
"loss": 0.553,
"losses/dpo": 0.6136656403541565,
"losses/sft": 1.6066731214523315,
"losses/total": 0.6136656403541565,
"ref_logps/chosen": -72.82428741455078,
"ref_logps/rejected": -69.51007843017578,
"rewards/accuracies": 0.6880000233650208,
"rewards/chosen": 0.08178197592496872,
"rewards/margins": 0.6952335834503174,
"rewards/rejected": -0.6134517192840576,
"step": 1125
},
{
"epoch": 2.88,
"learning_rate": 2.181987000928505e-08,
"logps/chosen": -71.19115447998047,
"logps/rejected": -74.97571563720703,
"loss": 0.5495,
"losses/dpo": 0.5555659532546997,
"losses/sft": 1.547566533088684,
"losses/total": 0.5555659532546997,
"ref_logps/chosen": -72.04399108886719,
"ref_logps/rejected": -68.92461395263672,
"rewards/accuracies": 0.6770000457763672,
"rewards/chosen": 0.08528263866901398,
"rewards/margins": 0.6903927326202393,
"rewards/rejected": -0.6051101088523865,
"step": 1150
},
{
"epoch": 2.94,
"learning_rate": 1.0213556174558959e-08,
"logps/chosen": -68.98538970947266,
"logps/rejected": -74.58392333984375,
"loss": 0.5254,
"losses/dpo": 0.49954432249069214,
"losses/sft": 1.4814612865447998,
"losses/total": 0.49954432249069214,
"ref_logps/chosen": -70.0972671508789,
"ref_logps/rejected": -68.21308898925781,
"rewards/accuracies": 0.6914999485015869,
"rewards/chosen": 0.11118759214878082,
"rewards/margins": 0.7482713460922241,
"rewards/rejected": -0.6370838284492493,
"step": 1175
},
{
"epoch": 3.0,
"step": 1197,
"total_flos": 0.0,
"train_loss": 0.5977537606095112,
"train_runtime": 15731.8751,
"train_samples_per_second": 6.094,
"train_steps_per_second": 0.076
}
],
"logging_steps": 25,
"max_steps": 1197,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}