aip_sft_dpo_gemma12b_100 / trainer_state.json
cackerman's picture
Upload folder using huggingface_hub
c7c4030 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9937888198757764,
"eval_steps": 20,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09937888198757763,
"grad_norm": 23.625,
"learning_rate": 4.967291771834727e-06,
"logits/chosen": -1.450693130493164,
"logits/rejected": -1.4506696462631226,
"logps/chosen": -300.4499206542969,
"logps/rejected": -581.7461547851562,
"loss": 0.5136,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.27882489562034607,
"rewards/margins": 1.4301296472549438,
"rewards/rejected": -1.7089545726776123,
"step": 10
},
{
"epoch": 0.19875776397515527,
"grad_norm": 31.625,
"learning_rate": 4.710738726825059e-06,
"logits/chosen": -1.3673866987228394,
"logits/rejected": -1.3606162071228027,
"logps/chosen": -336.33209228515625,
"logps/rejected": -559.6798706054688,
"loss": 0.247,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.2435611486434937,
"rewards/margins": 3.834404468536377,
"rewards/rejected": -5.07796573638916,
"step": 20
},
{
"epoch": 0.19875776397515527,
"eval_logits/chosen": -1.3263726234436035,
"eval_logits/rejected": -1.311964988708496,
"eval_logps/chosen": -339.557861328125,
"eval_logps/rejected": -699.66259765625,
"eval_loss": 0.1816757321357727,
"eval_rewards/accuracies": 0.9800000190734863,
"eval_rewards/chosen": -1.7551236152648926,
"eval_rewards/margins": 7.074146270751953,
"eval_rewards/rejected": -8.829269409179688,
"eval_runtime": 16.2646,
"eval_samples_per_second": 3.074,
"eval_steps_per_second": 3.074,
"step": 20
},
{
"epoch": 0.2981366459627329,
"grad_norm": 48.5,
"learning_rate": 4.224309443195261e-06,
"logits/chosen": -1.2823044061660767,
"logits/rejected": -1.2803713083267212,
"logps/chosen": -333.9573669433594,
"logps/rejected": -675.1748046875,
"loss": 0.19,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.4483742713928223,
"rewards/margins": 7.259348392486572,
"rewards/rejected": -9.707722663879395,
"step": 30
},
{
"epoch": 0.39751552795031053,
"grad_norm": 21.625,
"learning_rate": 3.5585836356437266e-06,
"logits/chosen": -1.260244369506836,
"logits/rejected": -1.258673071861267,
"logps/chosen": -440.227783203125,
"logps/rejected": -945.3692626953125,
"loss": 0.0878,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -4.9525017738342285,
"rewards/margins": 12.335118293762207,
"rewards/rejected": -17.28761863708496,
"step": 40
},
{
"epoch": 0.39751552795031053,
"eval_logits/chosen": -1.2100083827972412,
"eval_logits/rejected": -1.2017772197723389,
"eval_logps/chosen": -406.1869201660156,
"eval_logps/rejected": -921.7108764648438,
"eval_loss": 0.0693456158041954,
"eval_rewards/accuracies": 0.9800000190734863,
"eval_rewards/chosen": -5.086578845977783,
"eval_rewards/margins": 14.845105171203613,
"eval_rewards/rejected": -19.931682586669922,
"eval_runtime": 16.3825,
"eval_samples_per_second": 3.052,
"eval_steps_per_second": 3.052,
"step": 40
},
{
"epoch": 0.4968944099378882,
"grad_norm": 14.375,
"learning_rate": 2.7827845654882112e-06,
"logits/chosen": -1.1697198152542114,
"logits/rejected": -1.174869418144226,
"logps/chosen": -406.47735595703125,
"logps/rejected": -870.3468627929688,
"loss": 0.0755,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -6.050213813781738,
"rewards/margins": 12.730817794799805,
"rewards/rejected": -18.78103256225586,
"step": 50
},
{
"epoch": 0.5962732919254659,
"grad_norm": 8.6875,
"learning_rate": 1.9775810924218126e-06,
"logits/chosen": -1.1027421951293945,
"logits/rejected": -1.1014692783355713,
"logps/chosen": -458.50201416015625,
"logps/rejected": -897.8450927734375,
"loss": 0.0552,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.981372833251953,
"rewards/margins": 14.360372543334961,
"rewards/rejected": -22.341745376586914,
"step": 60
},
{
"epoch": 0.5962732919254659,
"eval_logits/chosen": -1.0882357358932495,
"eval_logits/rejected": -1.088262677192688,
"eval_logps/chosen": -476.73382568359375,
"eval_logps/rejected": -1135.177734375,
"eval_loss": 0.037782467901706696,
"eval_rewards/accuracies": 0.9800000190734863,
"eval_rewards/chosen": -8.613921165466309,
"eval_rewards/margins": 21.991100311279297,
"eval_rewards/rejected": -30.605024337768555,
"eval_runtime": 16.2315,
"eval_samples_per_second": 3.08,
"eval_steps_per_second": 3.08,
"step": 60
},
{
"epoch": 0.6956521739130435,
"grad_norm": 8.1875,
"learning_rate": 1.2266995941780934e-06,
"logits/chosen": -1.077918291091919,
"logits/rejected": -1.073142170906067,
"logps/chosen": -450.880859375,
"logps/rejected": -798.1456298828125,
"loss": 0.0748,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -8.840319633483887,
"rewards/margins": 12.28600025177002,
"rewards/rejected": -21.12632179260254,
"step": 70
},
{
"epoch": 0.7950310559006211,
"grad_norm": 13.3125,
"learning_rate": 6.082179604557617e-07,
"logits/chosen": -1.0586068630218506,
"logits/rejected": -1.0654761791229248,
"logps/chosen": -583.5306396484375,
"logps/rejected": -1094.6214599609375,
"loss": 0.0361,
"rewards/accuracies": 1.0,
"rewards/chosen": -11.453761100769043,
"rewards/margins": 17.418289184570312,
"rewards/rejected": -28.87204933166504,
"step": 80
},
{
"epoch": 0.7950310559006211,
"eval_logits/chosen": -1.056160569190979,
"eval_logits/rejected": -1.0596200227737427,
"eval_logps/chosen": -494.7749938964844,
"eval_logps/rejected": -1187.513427734375,
"eval_loss": 0.03448532894253731,
"eval_rewards/accuracies": 0.9800000190734863,
"eval_rewards/chosen": -9.515981674194336,
"eval_rewards/margins": 23.70583724975586,
"eval_rewards/rejected": -33.22181701660156,
"eval_runtime": 16.2498,
"eval_samples_per_second": 3.077,
"eval_steps_per_second": 3.077,
"step": 80
},
{
"epoch": 0.8944099378881988,
"grad_norm": 10.8125,
"learning_rate": 1.864469258932397e-07,
"logits/chosen": -1.0640597343444824,
"logits/rejected": -1.0643057823181152,
"logps/chosen": -545.856689453125,
"logps/rejected": -1039.5836181640625,
"loss": 0.0496,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -10.582590103149414,
"rewards/margins": 17.025867462158203,
"rewards/rejected": -27.608455657958984,
"step": 90
},
{
"epoch": 0.9937888198757764,
"grad_norm": 16.75,
"learning_rate": 5.242934405720879e-09,
"logits/chosen": -1.025246500968933,
"logits/rejected": -1.0300629138946533,
"logps/chosen": -522.3057861328125,
"logps/rejected": -1139.15283203125,
"loss": 0.0586,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -10.60677433013916,
"rewards/margins": 20.42071533203125,
"rewards/rejected": -31.027490615844727,
"step": 100
},
{
"epoch": 0.9937888198757764,
"eval_logits/chosen": -1.0552905797958374,
"eval_logits/rejected": -1.0563225746154785,
"eval_logps/chosen": -495.5924072265625,
"eval_logps/rejected": -1189.4423828125,
"eval_loss": 0.0349690206348896,
"eval_rewards/accuracies": 0.9800000190734863,
"eval_rewards/chosen": -9.556852340698242,
"eval_rewards/margins": 23.761411666870117,
"eval_rewards/rejected": -33.318260192871094,
"eval_runtime": 16.2372,
"eval_samples_per_second": 3.079,
"eval_steps_per_second": 3.079,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 101,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}