Qwen2.5-Coder-32B-Instruct-edit / trainer_state.json
Kaylee0501's picture
Upload folder using huggingface_hub
c53fcfc verified
{
"best_global_step": 98,
"best_metric": 0.14128435,
"best_model_checkpoint": "./output_dpo/v0-20260226-085120/checkpoint-98",
"epoch": 1.9861635220125786,
"eval_steps": 50,
"global_step": 98,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02012578616352201,
"grad_norm": 1.4377635717391968,
"learning_rate": 2e-05,
"logits/chosen": -1.7360858917236328,
"logits/rejected": -1.7113451957702637,
"logps/chosen": -111.01881408691406,
"logps/rejected": -147.11973571777344,
"loss": 1.319612741470337,
"memory(GiB)": 239.65,
"nll_loss": 0.6264656782150269,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1,
"train_speed(iter/s)": 0.004523
},
{
"epoch": 0.10062893081761007,
"grad_norm": 1.4525984525680542,
"learning_rate": 0.0001,
"logits/chosen": -1.6965384483337402,
"logits/rejected": -1.681287407875061,
"logps/chosen": -111.46014404296875,
"logps/rejected": -143.75,
"loss": 1.361119270324707,
"memory(GiB)": 239.65,
"nll_loss": 0.6818519830703735,
"rewards/accuracies": 0.5078125,
"rewards/chosen": 0.04633765667676926,
"rewards/margins": 0.02939797379076481,
"rewards/rejected": 0.01693967543542385,
"step": 5,
"train_speed(iter/s)": 0.003936
},
{
"epoch": 0.20125786163522014,
"grad_norm": 1.0078742504119873,
"learning_rate": 9.928848976574019e-05,
"logits/chosen": -1.7403156757354736,
"logits/rejected": -1.726575255393982,
"logps/chosen": -92.17589569091797,
"logps/rejected": -137.906005859375,
"loss": 0.9127995491027832,
"memory(GiB)": 284.39,
"nll_loss": 0.5469792485237122,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 2.0497705936431885,
"rewards/margins": 1.318472146987915,
"rewards/rejected": 0.7312980890274048,
"step": 10,
"train_speed(iter/s)": 0.003899
},
{
"epoch": 0.3018867924528302,
"grad_norm": 1.1189488172531128,
"learning_rate": 9.717420893549902e-05,
"logits/chosen": -1.8927457332611084,
"logits/rejected": -1.8742872476577759,
"logps/chosen": -56.06190872192383,
"logps/rejected": -129.63563537597656,
"loss": 0.5759311199188233,
"memory(GiB)": 284.39,
"nll_loss": 0.3772023618221283,
"rewards/accuracies": 0.96875,
"rewards/chosen": 5.639416694641113,
"rewards/margins": 3.9854512214660645,
"rewards/rejected": 1.6539649963378906,
"step": 15,
"train_speed(iter/s)": 0.003905
},
{
"epoch": 0.4025157232704403,
"grad_norm": 2.065215826034546,
"learning_rate": 9.371733080722911e-05,
"logits/chosen": -2.0726945400238037,
"logits/rejected": -2.0517024993896484,
"logps/chosen": -38.782867431640625,
"logps/rejected": -141.28872680664062,
"loss": 0.34540715217590334,
"memory(GiB)": 284.39,
"nll_loss": 0.24602404236793518,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 7.165956020355225,
"rewards/margins": 7.171680450439453,
"rewards/rejected": -0.005724119953811169,
"step": 20,
"train_speed(iter/s)": 0.003816
},
{
"epoch": 0.5031446540880503,
"grad_norm": 0.9638963937759399,
"learning_rate": 8.90162395476046e-05,
"logits/chosen": -2.205498456954956,
"logits/rejected": -2.182650089263916,
"logps/chosen": -34.5748405456543,
"logps/rejected": -168.0699462890625,
"loss": 0.29475107192993166,
"memory(GiB)": 284.39,
"nll_loss": 0.2225954234600067,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 7.66351842880249,
"rewards/margins": 9.37935733795166,
"rewards/rejected": -1.7158397436141968,
"step": 25,
"train_speed(iter/s)": 0.00375
},
{
"epoch": 0.6037735849056604,
"grad_norm": 0.5872039794921875,
"learning_rate": 8.320473013836196e-05,
"logits/chosen": -2.2474639415740967,
"logits/rejected": -2.2216179370880127,
"logps/chosen": -23.524024963378906,
"logps/rejected": -159.84942626953125,
"loss": 0.23147854804992676,
"memory(GiB)": 284.39,
"nll_loss": 0.18826261162757874,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 8.869623184204102,
"rewards/margins": 10.225828170776367,
"rewards/rejected": -1.3562055826187134,
"step": 30,
"train_speed(iter/s)": 0.003835
},
{
"epoch": 0.7044025157232704,
"grad_norm": 0.8212366700172424,
"learning_rate": 7.644820051634812e-05,
"logits/chosen": -2.2804150581359863,
"logits/rejected": -2.2608768939971924,
"logps/chosen": -20.996126174926758,
"logps/rejected": -161.36029052734375,
"loss": 0.1881607413291931,
"memory(GiB)": 284.39,
"nll_loss": 0.13474711775779724,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 9.420888900756836,
"rewards/margins": 10.558382987976074,
"rewards/rejected": -1.137495517730713,
"step": 35,
"train_speed(iter/s)": 0.003906
},
{
"epoch": 0.8050314465408805,
"grad_norm": 0.9303659200668335,
"learning_rate": 6.89389442805288e-05,
"logits/chosen": -2.2562363147735596,
"logits/rejected": -2.2325804233551025,
"logps/chosen": -26.601587295532227,
"logps/rejected": -155.21389770507812,
"loss": 0.21106297969818116,
"memory(GiB)": 284.39,
"nll_loss": 0.15431135892868042,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 9.138971328735352,
"rewards/margins": 9.48228931427002,
"rewards/rejected": -0.34331730008125305,
"step": 40,
"train_speed(iter/s)": 0.003892
},
{
"epoch": 0.9056603773584906,
"grad_norm": 0.8759572505950928,
"learning_rate": 6.0890677937442574e-05,
"logits/chosen": -2.2504515647888184,
"logits/rejected": -2.236832618713379,
"logps/chosen": -24.932228088378906,
"logps/rejected": -150.9632110595703,
"loss": 0.21578831672668458,
"memory(GiB)": 284.39,
"nll_loss": 0.1573367863893509,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.940786361694336,
"rewards/margins": 9.429086685180664,
"rewards/rejected": -0.488300621509552,
"step": 45,
"train_speed(iter/s)": 0.003836
},
{
"epoch": 1.020125786163522,
"grad_norm": 1.6238784790039062,
"learning_rate": 5.2532458441935636e-05,
"logits/chosen": -2.3447046279907227,
"logits/rejected": -2.316112995147705,
"logps/chosen": -17.97600746154785,
"logps/rejected": -169.5856475830078,
"loss": 0.1865710735321045,
"memory(GiB)": 284.39,
"nll_loss": 0.1159815713763237,
"rewards/accuracies": 0.9767441749572754,
"rewards/chosen": 9.204967498779297,
"rewards/margins": 11.411535263061523,
"rewards/rejected": -2.2065672874450684,
"step": 50,
"train_speed(iter/s)": 0.003798
},
{
"epoch": 1.020125786163522,
"eval_logits/chosen": -2.462606191635132,
"eval_logits/rejected": -2.437251091003418,
"eval_logps/chosen": -19.061992645263672,
"eval_logps/rejected": -184.38104248046875,
"eval_loss": 0.1830219328403473,
"eval_nll_loss": 0.17293420433998108,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 8.904085159301758,
"eval_rewards/margins": 12.307174682617188,
"eval_rewards/rejected": -3.4030885696411133,
"eval_runtime": 55.6446,
"eval_samples_per_second": 0.288,
"eval_steps_per_second": 0.144,
"step": 50
},
{
"epoch": 1.120754716981132,
"grad_norm": 0.5176746249198914,
"learning_rate": 4.410216414245771e-05,
"logits/chosen": -2.3740134239196777,
"logits/rejected": -2.3573694229125977,
"logps/chosen": -26.2227840423584,
"logps/rejected": -179.9822540283203,
"loss": 0.19258421659469604,
"memory(GiB)": 284.39,
"nll_loss": 0.17000555992126465,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.813023567199707,
"rewards/margins": 11.842904090881348,
"rewards/rejected": -3.029881715774536,
"step": 55,
"train_speed(iter/s)": 0.00372
},
{
"epoch": 1.221383647798742,
"grad_norm": 0.6022250056266785,
"learning_rate": 3.58397246658848e-05,
"logits/chosen": -2.4972939491271973,
"logits/rejected": -2.4699082374572754,
"logps/chosen": -14.000228881835938,
"logps/rejected": -196.9097442626953,
"loss": 0.10635790824890137,
"memory(GiB)": 284.39,
"nll_loss": 0.08761530369520187,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.721292495727539,
"rewards/margins": 14.493858337402344,
"rewards/rejected": -4.772566795349121,
"step": 60,
"train_speed(iter/s)": 0.003743
},
{
"epoch": 1.3220125786163521,
"grad_norm": 0.2501760721206665,
"learning_rate": 2.798029242211828e-05,
"logits/chosen": -2.5347957611083984,
"logits/rejected": -2.50445818901062,
"logps/chosen": -23.887548446655273,
"logps/rejected": -183.65591430664062,
"loss": 0.18030774593353271,
"memory(GiB)": 284.39,
"nll_loss": 0.14212127029895782,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 9.50097370147705,
"rewards/margins": 12.938058853149414,
"rewards/rejected": -3.4370861053466797,
"step": 65,
"train_speed(iter/s)": 0.003757
},
{
"epoch": 1.4226415094339622,
"grad_norm": 0.42134493589401245,
"learning_rate": 2.074755007023461e-05,
"logits/chosen": -2.5006675720214844,
"logits/rejected": -2.478884220123291,
"logps/chosen": -12.177281379699707,
"logps/rejected": -190.2030487060547,
"loss": 0.09010829329490662,
"memory(GiB)": 284.39,
"nll_loss": 0.07332514226436615,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 10.052157402038574,
"rewards/margins": 13.963111877441406,
"rewards/rejected": -3.910953998565674,
"step": 70,
"train_speed(iter/s)": 0.003774
},
{
"epoch": 1.5232704402515722,
"grad_norm": 0.5933993458747864,
"learning_rate": 1.434734441843899e-05,
"logits/chosen": -2.502887487411499,
"logits/rejected": -2.486396551132202,
"logps/chosen": -18.57794189453125,
"logps/rejected": -170.333740234375,
"loss": 0.13938431739807128,
"memory(GiB)": 284.39,
"nll_loss": 0.11240720748901367,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.312703132629395,
"rewards/margins": 11.638362884521484,
"rewards/rejected": -2.325660467147827,
"step": 75,
"train_speed(iter/s)": 0.00376
},
{
"epoch": 1.6238993710691823,
"grad_norm": 0.2634561061859131,
"learning_rate": 8.961827939636196e-06,
"logits/chosen": -2.5577776432037354,
"logits/rejected": -2.5379796028137207,
"logps/chosen": -16.603967666625977,
"logps/rejected": -171.06466674804688,
"loss": 0.10857141017913818,
"memory(GiB)": 284.39,
"nll_loss": 0.09158992022275925,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.913006782531738,
"rewards/margins": 12.598286628723145,
"rewards/rejected": -2.685279369354248,
"step": 80,
"train_speed(iter/s)": 0.003778
},
{
"epoch": 1.7245283018867923,
"grad_norm": 0.3385748267173767,
"learning_rate": 4.744274637483936e-06,
"logits/chosen": -2.562164783477783,
"logits/rejected": -2.5376689434051514,
"logps/chosen": -14.094012260437012,
"logps/rejected": -163.73416137695312,
"loss": 0.11240246295928955,
"memory(GiB)": 284.39,
"nll_loss": 0.09068052470684052,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 9.352496147155762,
"rewards/margins": 11.771881103515625,
"rewards/rejected": -2.419384479522705,
"step": 85,
"train_speed(iter/s)": 0.003777
},
{
"epoch": 1.8251572327044026,
"grad_norm": 0.3210693895816803,
"learning_rate": 1.8147178055029579e-06,
"logits/chosen": -2.602306842803955,
"logits/rejected": -2.567457675933838,
"logps/chosen": -17.956844329833984,
"logps/rejected": -175.5157470703125,
"loss": 0.11938213109970093,
"memory(GiB)": 284.39,
"nll_loss": 0.09758913516998291,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 9.622145652770996,
"rewards/margins": 12.495707511901855,
"rewards/rejected": -2.873561382293701,
"step": 90,
"train_speed(iter/s)": 0.003796
},
{
"epoch": 1.9257861635220126,
"grad_norm": 0.3329070210456848,
"learning_rate": 2.5653383040524227e-07,
"logits/chosen": -2.591177463531494,
"logits/rejected": -2.568394422531128,
"logps/chosen": -17.04227638244629,
"logps/rejected": -188.1129913330078,
"loss": 0.11813113689422608,
"memory(GiB)": 284.39,
"nll_loss": 0.10378739982843399,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.477154731750488,
"rewards/margins": 13.704524040222168,
"rewards/rejected": -4.227367877960205,
"step": 95,
"train_speed(iter/s)": 0.003803
},
{
"epoch": 1.9861635220125786,
"eval_logits/chosen": -2.6464767456054688,
"eval_logits/rejected": -2.6153650283813477,
"eval_logps/chosen": -15.376700401306152,
"eval_logps/rejected": -193.30332946777344,
"eval_loss": 0.14128434658050537,
"eval_nll_loss": 0.14024823904037476,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 9.272613525390625,
"eval_rewards/margins": 13.567930221557617,
"eval_rewards/rejected": -4.295315742492676,
"eval_runtime": 55.5933,
"eval_samples_per_second": 0.288,
"eval_steps_per_second": 0.144,
"step": 98
}
],
"logging_steps": 5,
"max_steps": 98,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.261229460544324e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}