llama_binary_4096_5000_dpo_sft02 / trainer_state.json
unfair221's picture
Upload folder using huggingface_hub
c9c1d45 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.064,
"grad_norm": 47.54409408569336,
"learning_rate": 6.249999999999999e-07,
"logits/chosen": -1.7506301403045654,
"logits/rejected": -0.9330015778541565,
"logps/chosen": -326.6182861328125,
"logps/rejected": -795.388916015625,
"loss": 0.7092,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": 0.004627525806427002,
"rewards/margins": 0.044374678283929825,
"rewards/rejected": -0.039747148752212524,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 11.836691856384277,
"learning_rate": 9.979871469976195e-07,
"logits/chosen": -1.7661733627319336,
"logits/rejected": -0.9799120426177979,
"logps/chosen": -345.76220703125,
"logps/rejected": -836.4850463867188,
"loss": 0.3981,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": 0.05132237449288368,
"rewards/margins": 1.2102007865905762,
"rewards/rejected": -1.1588784456253052,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 0.3670778274536133,
"learning_rate": 9.755282581475767e-07,
"logits/chosen": -1.934342861175537,
"logits/rejected": -1.3244361877441406,
"logps/chosen": -345.7969970703125,
"logps/rejected": -941.4720458984375,
"loss": 0.1481,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -3.1422524452209473,
"rewards/margins": 11.108638763427734,
"rewards/rejected": -14.250890731811523,
"step": 30
},
{
"epoch": 0.256,
"grad_norm": 0.37081119418144226,
"learning_rate": 9.29224396800933e-07,
"logits/chosen": -1.9896260499954224,
"logits/rejected": -1.5620503425598145,
"logps/chosen": -398.2611999511719,
"logps/rejected": -1067.803466796875,
"loss": 0.0628,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -8.123555183410645,
"rewards/margins": 22.457536697387695,
"rewards/rejected": -30.58109474182129,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 0.2008218616247177,
"learning_rate": 8.613974319136957e-07,
"logits/chosen": -1.8343955278396606,
"logits/rejected": -1.3990973234176636,
"logps/chosen": -393.0896301269531,
"logps/rejected": -1073.05419921875,
"loss": 0.0482,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -5.675015449523926,
"rewards/margins": 21.024158477783203,
"rewards/rejected": -26.699174880981445,
"step": 50
},
{
"epoch": 0.384,
"grad_norm": 0.2783186733722687,
"learning_rate": 7.754484907260512e-07,
"logits/chosen": -1.7005574703216553,
"logits/rejected": -1.2351994514465332,
"logps/chosen": -363.7461853027344,
"logps/rejected": -956.2418212890625,
"loss": 0.0509,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.303299903869629,
"rewards/margins": 15.75185775756836,
"rewards/rejected": -19.055158615112305,
"step": 60
},
{
"epoch": 0.448,
"grad_norm": 0.19658127427101135,
"learning_rate": 6.756874120406714e-07,
"logits/chosen": -1.6647199392318726,
"logits/rejected": -1.1417269706726074,
"logps/chosen": -306.51519775390625,
"logps/rejected": -924.2443237304688,
"loss": 0.0512,
"rewards/accuracies": 0.984375,
"rewards/chosen": -1.8024471998214722,
"rewards/margins": 14.316963195800781,
"rewards/rejected": -16.119409561157227,
"step": 70
},
{
"epoch": 0.512,
"grad_norm": 0.15499065816402435,
"learning_rate": 5.671166329088277e-07,
"logits/chosen": -1.6030125617980957,
"logits/rejected": -1.0880917310714722,
"logps/chosen": -331.5353088378906,
"logps/rejected": -947.468017578125,
"loss": 0.0486,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.8866089582443237,
"rewards/margins": 14.326324462890625,
"rewards/rejected": -16.212932586669922,
"step": 80
},
{
"epoch": 0.576,
"grad_norm": 0.15497474372386932,
"learning_rate": 4.5518034554828327e-07,
"logits/chosen": -1.6156357526779175,
"logits/rejected": -1.0863291025161743,
"logps/chosen": -340.2659912109375,
"logps/rejected": -988.78564453125,
"loss": 0.0554,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -1.919586420059204,
"rewards/margins": 14.997438430786133,
"rewards/rejected": -16.917024612426758,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 0.16324351727962494,
"learning_rate": 3.454915028125263e-07,
"logits/chosen": -1.586004376411438,
"logits/rejected": -1.047084093093872,
"logps/chosen": -323.78350830078125,
"logps/rejected": -967.1044311523438,
"loss": 0.0436,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -1.772637963294983,
"rewards/margins": 15.62191390991211,
"rewards/rejected": -17.39455223083496,
"step": 100
},
{
"epoch": 0.704,
"grad_norm": 0.31694385409355164,
"learning_rate": 2.4355036129704696e-07,
"logits/chosen": -1.5823489427566528,
"logits/rejected": -1.0539071559906006,
"logps/chosen": -335.7318420410156,
"logps/rejected": -982.3804321289062,
"loss": 0.0536,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.8680168390274048,
"rewards/margins": 15.72433853149414,
"rewards/rejected": -17.592355728149414,
"step": 110
},
{
"epoch": 0.768,
"grad_norm": 0.4332602918148041,
"learning_rate": 1.5446867550656767e-07,
"logits/chosen": -1.5331312417984009,
"logits/rejected": -1.0537359714508057,
"logps/chosen": -365.61004638671875,
"logps/rejected": -955.177978515625,
"loss": 0.0621,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.1784939765930176,
"rewards/margins": 14.849874496459961,
"rewards/rejected": -17.02836799621582,
"step": 120
},
{
"epoch": 0.832,
"grad_norm": 0.3648674786090851,
"learning_rate": 8.271337313934867e-08,
"logits/chosen": -1.572448968887329,
"logits/rejected": -1.061173677444458,
"logps/chosen": -352.5264892578125,
"logps/rejected": -974.0443115234375,
"loss": 0.0502,
"rewards/accuracies": 0.9906249642372131,
"rewards/chosen": -1.944822072982788,
"rewards/margins": 15.244119644165039,
"rewards/rejected": -17.188941955566406,
"step": 130
},
{
"epoch": 0.896,
"grad_norm": 0.1386958509683609,
"learning_rate": 3.188256468013139e-08,
"logits/chosen": -1.604827642440796,
"logits/rejected": -1.0495269298553467,
"logps/chosen": -310.2612609863281,
"logps/rejected": -985.6607055664062,
"loss": 0.0457,
"rewards/accuracies": 0.996874988079071,
"rewards/chosen": -1.420556902885437,
"rewards/margins": 16.280969619750977,
"rewards/rejected": -17.701526641845703,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 0.13034382462501526,
"learning_rate": 4.5251191160326495e-09,
"logits/chosen": -1.5691853761672974,
"logits/rejected": -1.0549018383026123,
"logps/chosen": -345.1254577636719,
"logps/rejected": -989.0575561523438,
"loss": 0.0507,
"rewards/accuracies": 0.984375,
"rewards/chosen": -1.7863011360168457,
"rewards/margins": 15.717473983764648,
"rewards/rejected": -17.503774642944336,
"step": 150
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1530176818095063e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}