sample_model_llama3.2_sft_dpo / trainer_state.json
Cherran's picture
Upload folder using huggingface_hub
54b43fc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.20276497695852536,
"eval_steps": 5,
"global_step": 44,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02304147465437788,
"eval_logits/chosen": -1.1632846593856812,
"eval_logits/rejected": -0.8827418088912964,
"eval_logps/chosen": -731.2137451171875,
"eval_logps/rejected": -465.1360778808594,
"eval_loss": 0.7025490403175354,
"eval_rewards/accuracies": 0.625,
"eval_rewards/chosen": 0.08797025680541992,
"eval_rewards/margins": 0.07403016090393066,
"eval_rewards/rejected": 0.013940095901489258,
"eval_runtime": 2.5482,
"eval_samples_per_second": 9.811,
"eval_steps_per_second": 1.57,
"step": 5
},
{
"epoch": 0.04608294930875576,
"grad_norm": 86.91683959960938,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -1.066173791885376,
"logits/rejected": -0.9449604749679565,
"logps/chosen": -674.0364379882812,
"logps/rejected": -356.2140808105469,
"loss": 0.777,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.1258804351091385,
"rewards/margins": -0.14030227065086365,
"rewards/rejected": 0.014421844854950905,
"step": 10
},
{
"epoch": 0.04608294930875576,
"eval_logits/chosen": -1.1648622751235962,
"eval_logits/rejected": -0.8837531208992004,
"eval_logps/chosen": -731.4483642578125,
"eval_logps/rejected": -465.3337707519531,
"eval_loss": 0.7411171197891235,
"eval_rewards/accuracies": 0.625,
"eval_rewards/chosen": -0.029358863830566406,
"eval_rewards/margins": 0.055533647537231445,
"eval_rewards/rejected": -0.08489251136779785,
"eval_runtime": 2.2132,
"eval_samples_per_second": 11.296,
"eval_steps_per_second": 1.807,
"step": 10
},
{
"epoch": 0.06912442396313365,
"eval_logits/chosen": -1.1638308763504028,
"eval_logits/rejected": -0.8826640844345093,
"eval_logps/chosen": -731.311767578125,
"eval_logps/rejected": -465.1154479980469,
"eval_loss": 0.7261512279510498,
"eval_rewards/accuracies": 0.625,
"eval_rewards/chosen": 0.03898191452026367,
"eval_rewards/margins": 0.01471400260925293,
"eval_rewards/rejected": 0.024267911911010742,
"eval_runtime": 2.2289,
"eval_samples_per_second": 11.216,
"eval_steps_per_second": 1.795,
"step": 15
},
{
"epoch": 0.09216589861751152,
"grad_norm": 133.0209197998047,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -1.1327978372573853,
"logits/rejected": -0.9894822239875793,
"logps/chosen": -718.1174926757812,
"logps/rejected": -418.946533203125,
"loss": 0.685,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": 0.0425872802734375,
"rewards/margins": 0.05570220947265625,
"rewards/rejected": -0.01311492919921875,
"step": 20
},
{
"epoch": 0.09216589861751152,
"eval_logits/chosen": -1.165111780166626,
"eval_logits/rejected": -0.8841784000396729,
"eval_logps/chosen": -731.49267578125,
"eval_logps/rejected": -465.2555847167969,
"eval_loss": 0.7530465722084045,
"eval_rewards/accuracies": 0.53125,
"eval_rewards/chosen": -0.0514984130859375,
"eval_rewards/margins": -0.005685091018676758,
"eval_rewards/rejected": -0.04581332206726074,
"eval_runtime": 2.2284,
"eval_samples_per_second": 11.219,
"eval_steps_per_second": 1.795,
"step": 20
},
{
"epoch": 0.1152073732718894,
"eval_logits/chosen": -1.1639286279678345,
"eval_logits/rejected": -0.8834071159362793,
"eval_logps/chosen": -731.60693359375,
"eval_logps/rejected": -465.266845703125,
"eval_loss": 0.7450304627418518,
"eval_rewards/accuracies": 0.3125,
"eval_rewards/chosen": -0.10858917236328125,
"eval_rewards/margins": -0.05713796615600586,
"eval_rewards/rejected": -0.05145120620727539,
"eval_runtime": 2.2254,
"eval_samples_per_second": 11.234,
"eval_steps_per_second": 1.797,
"step": 25
},
{
"epoch": 0.1382488479262673,
"grad_norm": 112.3116683959961,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -1.0476138591766357,
"logits/rejected": -0.9094411730766296,
"logps/chosen": -582.4729614257812,
"logps/rejected": -353.1932678222656,
"loss": 0.7665,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.1044158935546875,
"rewards/margins": -0.09558334201574326,
"rewards/rejected": -0.008832549676299095,
"step": 30
},
{
"epoch": 0.1382488479262673,
"eval_logits/chosen": -1.1638270616531372,
"eval_logits/rejected": -0.8826746344566345,
"eval_logps/chosen": -731.166259765625,
"eval_logps/rejected": -465.258544921875,
"eval_loss": 0.6754930019378662,
"eval_rewards/accuracies": 0.65625,
"eval_rewards/chosen": 0.11172008514404297,
"eval_rewards/margins": 0.1590101718902588,
"eval_rewards/rejected": -0.04729008674621582,
"eval_runtime": 2.2212,
"eval_samples_per_second": 11.255,
"eval_steps_per_second": 1.801,
"step": 30
},
{
"epoch": 0.16129032258064516,
"eval_logits/chosen": -1.1637563705444336,
"eval_logits/rejected": -0.882408857345581,
"eval_logps/chosen": -731.4846801757812,
"eval_logps/rejected": -465.2984313964844,
"eval_loss": 0.718708336353302,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.04746055603027344,
"eval_rewards/margins": 0.01976180076599121,
"eval_rewards/rejected": -0.06722235679626465,
"eval_runtime": 2.2257,
"eval_samples_per_second": 11.232,
"eval_steps_per_second": 1.797,
"step": 35
},
{
"epoch": 0.18433179723502305,
"grad_norm": 121.56043243408203,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -1.1619962453842163,
"logits/rejected": -0.9679350852966309,
"logps/chosen": -642.6929931640625,
"logps/rejected": -397.54107666015625,
"loss": 0.7523,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.057281494140625,
"rewards/margins": -0.09049377590417862,
"rewards/rejected": 0.03321228176355362,
"step": 40
},
{
"epoch": 0.18433179723502305,
"eval_logits/chosen": -1.164229393005371,
"eval_logits/rejected": -0.8833534717559814,
"eval_logps/chosen": -731.2298583984375,
"eval_logps/rejected": -465.13787841796875,
"eval_loss": 0.6944708228111267,
"eval_rewards/accuracies": 0.59375,
"eval_rewards/chosen": 0.07993173599243164,
"eval_rewards/margins": 0.06688284873962402,
"eval_rewards/rejected": 0.013048887252807617,
"eval_runtime": 2.225,
"eval_samples_per_second": 11.236,
"eval_steps_per_second": 1.798,
"step": 40
}
],
"logging_steps": 10,
"max_steps": 44,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}