p-vector / checkpoint-100 /trainer_state.json
saranshagarwal2020's picture
Upload folder using huggingface_hub
f3a5828 verified
{
"best_global_step": 100,
"best_metric": 0.6752368807792664,
"best_model_checkpoint": "models/dpo_fft_LFM2.5-1.2B-Instruct_argilla__distilabel-math-preference-dpo_20260222_210527/checkpoint-100",
"epoch": 0.6956521739130435,
"eval_steps": 100,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06956521739130435,
"grad_norm": 87.0,
"learning_rate": 4.090909090909091e-07,
"logits/chosen": -1.0601829290390015,
"logits/rejected": -1.0425456762313843,
"logps/chosen": -332.2013244628906,
"logps/rejected": -333.1183776855469,
"loss": 0.6823273181915284,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.015406012535095215,
"rewards/margins": 0.03173117712140083,
"rewards/rejected": -0.01632516458630562,
"step": 10
},
{
"epoch": 0.1391304347826087,
"grad_norm": 98.5,
"learning_rate": 8.636363636363636e-07,
"logits/chosen": -1.0965769290924072,
"logits/rejected": -1.0956510305404663,
"logps/chosen": -328.796875,
"logps/rejected": -312.0242919921875,
"loss": 0.6926839828491211,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.093757264316082,
"rewards/margins": 0.013911411166191101,
"rewards/rejected": 0.0798458456993103,
"step": 20
},
{
"epoch": 0.20869565217391303,
"grad_norm": 96.5,
"learning_rate": 1.318181818181818e-06,
"logits/chosen": -1.1252676248550415,
"logits/rejected": -1.1598210334777832,
"logps/chosen": -326.04327392578125,
"logps/rejected": -303.9259033203125,
"loss": 0.7117842674255371,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.48031529784202576,
"rewards/margins": -0.010448494926095009,
"rewards/rejected": 0.49076375365257263,
"step": 30
},
{
"epoch": 0.2782608695652174,
"grad_norm": 109.0,
"learning_rate": 1.7727272727272727e-06,
"logits/chosen": -1.0572926998138428,
"logits/rejected": -1.069678544998169,
"logps/chosen": -333.5104064941406,
"logps/rejected": -322.76116943359375,
"loss": 0.721163272857666,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 1.2552604675292969,
"rewards/margins": 0.020199721679091454,
"rewards/rejected": 1.2350608110427856,
"step": 40
},
{
"epoch": 0.34782608695652173,
"grad_norm": 106.0,
"learning_rate": 1.99918061692433e-06,
"logits/chosen": -1.116310954093933,
"logits/rejected": -1.126555323600769,
"logps/chosen": -325.90625,
"logps/rejected": -320.7261047363281,
"loss": 0.7112587451934814,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.9580303430557251,
"rewards/margins": 0.02043265663087368,
"rewards/rejected": 0.9375975728034973,
"step": 50
},
{
"epoch": 0.41739130434782606,
"grad_norm": 134.0,
"learning_rate": 1.992633606781968e-06,
"logits/chosen": -1.0915653705596924,
"logits/rejected": -1.0714164972305298,
"logps/chosen": -335.96258544921875,
"logps/rejected": -329.37567138671875,
"loss": 0.6888086795806885,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": 0.24013535678386688,
"rewards/margins": 0.025822216644883156,
"rewards/rejected": 0.21431314945220947,
"step": 60
},
{
"epoch": 0.48695652173913045,
"grad_norm": 168.0,
"learning_rate": 1.9795824849893477e-06,
"logits/chosen": -1.124298334121704,
"logits/rejected": -1.1153584718704224,
"logps/chosen": -319.74371337890625,
"logps/rejected": -317.81964111328125,
"loss": 0.7498865127563477,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": 0.3042285442352295,
"rewards/margins": -0.07379330694675446,
"rewards/rejected": 0.37802186608314514,
"step": 70
},
{
"epoch": 0.5565217391304348,
"grad_norm": 93.5,
"learning_rate": 1.960112767443493e-06,
"logits/chosen": -1.1165910959243774,
"logits/rejected": -1.1083123683929443,
"logps/chosen": -314.81610107421875,
"logps/rejected": -312.41070556640625,
"loss": 0.67913818359375,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.3251148760318756,
"rewards/margins": 0.07726944983005524,
"rewards/rejected": 0.24784541130065918,
"step": 80
},
{
"epoch": 0.6260869565217392,
"grad_norm": 97.5,
"learning_rate": 1.9343520271137762e-06,
"logits/chosen": -1.0576120615005493,
"logits/rejected": -1.0416970252990723,
"logps/chosen": -333.35565185546875,
"logps/rejected": -329.2746276855469,
"loss": 0.6899321556091309,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 1.0298190116882324,
"rewards/margins": 0.062107719480991364,
"rewards/rejected": 0.967711329460144,
"step": 90
},
{
"epoch": 0.6956521739130435,
"grad_norm": 106.0,
"learning_rate": 1.9024690581354698e-06,
"logits/chosen": -1.0332655906677246,
"logits/rejected": -1.0259943008422852,
"logps/chosen": -327.9278564453125,
"logps/rejected": -320.8587951660156,
"loss": 0.6782574653625488,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 1.1065479516983032,
"rewards/margins": 0.09923191368579865,
"rewards/rejected": 1.007315993309021,
"step": 100
},
{
"epoch": 0.6956521739130435,
"eval_logits/chosen": -1.065671682357788,
"eval_logits/rejected": -1.0876761674880981,
"eval_logps/chosen": -315.0599670410156,
"eval_logps/rejected": -316.6776123046875,
"eval_loss": 0.6752368807792664,
"eval_rewards/accuracies": 0.5887096524238586,
"eval_rewards/chosen": 0.8812527060508728,
"eval_rewards/margins": 0.13870203495025635,
"eval_rewards/rejected": 0.7425506114959717,
"eval_runtime": 11.3291,
"eval_samples_per_second": 10.68,
"eval_steps_per_second": 2.736,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 432,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}