LASA_Models / llama3_8b /trainer_state.json
yangjunxiao2021's picture
Upload 13 files
269bd98 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1342281879194631,
"grad_norm": 37.765533447265625,
"kl": 0.08906249701976776,
"learning_rate": 9e-08,
"logits/chosen": -11704729.6,
"logits/rejected": -24988057.6,
"logps/chosen": -343.025,
"logps/rejected": -369.6,
"loss": 0.4995,
"num_unsafe": 0.5,
"rewards/chosen": -0.000567626953125,
"rewards/margins": 0.0042724609375,
"rewards/rejected": -0.004840087890625,
"step": 10
},
{
"epoch": 0.2684563758389262,
"grad_norm": 45.17057418823242,
"kl": 0.17304687201976776,
"learning_rate": 1.8999999999999998e-07,
"logits/chosen": 4589977.6,
"logits/rejected": -1248870.4,
"logps/chosen": -337.7,
"logps/rejected": -336.45,
"loss": 0.504,
"num_unsafe": 0.699999988079071,
"rewards/chosen": -0.0170135498046875,
"rewards/margins": -0.03390350341796875,
"rewards/rejected": 0.01688995361328125,
"step": 20
},
{
"epoch": 0.40268456375838924,
"grad_norm": 34.08852767944336,
"kl": 0.09187011420726776,
"learning_rate": 2.9e-07,
"logits/chosen": 1790771.2,
"logits/rejected": -5015142.4,
"logps/chosen": -340.775,
"logps/rejected": -351.55,
"loss": 0.4883,
"num_unsafe": 0.6499999761581421,
"rewards/chosen": 0.020189189910888673,
"rewards/margins": 0.09420499801635743,
"rewards/rejected": -0.07401580810546875,
"step": 30
},
{
"epoch": 0.5369127516778524,
"grad_norm": 32.13209533691406,
"kl": 0.02812499925494194,
"learning_rate": 3.8999999999999997e-07,
"logits/chosen": -3015884.8,
"logits/rejected": -15467315.2,
"logps/chosen": -384.05,
"logps/rejected": -349.35,
"loss": 0.459,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 0.0526611328125,
"rewards/margins": 0.33935546875,
"rewards/rejected": -0.2866943359375,
"step": 40
},
{
"epoch": 0.6711409395973155,
"grad_norm": 27.443809509277344,
"kl": 0.140625,
"learning_rate": 4.9e-07,
"logits/chosen": 8273305.6,
"logits/rejected": -5085593.6,
"logps/chosen": -382.5,
"logps/rejected": -387.775,
"loss": 0.3974,
"num_unsafe": 0.25,
"rewards/chosen": 0.202978515625,
"rewards/margins": 0.955419921875,
"rewards/rejected": -0.75244140625,
"step": 50
},
{
"epoch": 0.8053691275167785,
"grad_norm": 27.648706436157227,
"kl": 0.015625,
"learning_rate": 5.9e-07,
"logits/chosen": 4796211.2,
"logits/rejected": -1143603.2,
"logps/chosen": -316.45,
"logps/rejected": -333.0,
"loss": 0.3345,
"num_unsafe": 0.4000000059604645,
"rewards/chosen": 0.3970947265625,
"rewards/margins": 1.7534423828125,
"rewards/rejected": -1.35634765625,
"step": 60
},
{
"epoch": 0.9395973154362416,
"grad_norm": 18.576852798461914,
"kl": 0.07734374701976776,
"learning_rate": 6.9e-07,
"logits/chosen": 330137.6,
"logits/rejected": -10493440.0,
"logps/chosen": -304.325,
"logps/rejected": -347.8,
"loss": 0.2515,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 0.79794921875,
"rewards/margins": 3.04169921875,
"rewards/rejected": -2.24375,
"step": 70
},
{
"epoch": 1.0671140939597314,
"grad_norm": 15.856501579284668,
"kl": 0.15131579339504242,
"learning_rate": 7.9e-07,
"logits/chosen": 3814022.736842105,
"logits/rejected": -5148456.421052632,
"logps/chosen": -327.3421052631579,
"logps/rejected": -383.7368421052632,
"loss": 0.2113,
"num_unsafe": 0.5789473652839661,
"rewards/chosen": 1.1128957648026316,
"rewards/margins": 3.958701685855263,
"rewards/rejected": -2.8458059210526314,
"step": 80
},
{
"epoch": 1.2013422818791946,
"grad_norm": 22.200435638427734,
"kl": 0.07187499850988388,
"learning_rate": 8.9e-07,
"logits/chosen": 5061427.2,
"logits/rejected": -9743462.4,
"logps/chosen": -325.575,
"logps/rejected": -370.55,
"loss": 0.161,
"num_unsafe": 0.699999988079071,
"rewards/chosen": 1.821484375,
"rewards/margins": 5.42890625,
"rewards/rejected": -3.607421875,
"step": 90
},
{
"epoch": 1.3355704697986577,
"grad_norm": 11.401748657226562,
"kl": 0.0,
"learning_rate": 9.9e-07,
"logits/chosen": 7218790.4,
"logits/rejected": 6960742.4,
"logps/chosen": -260.6125,
"logps/rejected": -385.0,
"loss": 0.1304,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 2.35836181640625,
"rewards/margins": 6.27554931640625,
"rewards/rejected": -3.9171875,
"step": 100
},
{
"epoch": 1.4697986577181208,
"grad_norm": 5.563471794128418,
"kl": 0.5796874761581421,
"learning_rate": 9.872634363932886e-07,
"logits/chosen": 4330291.2,
"logits/rejected": -9574809.6,
"logps/chosen": -400.125,
"logps/rejected": -395.825,
"loss": 0.1417,
"num_unsafe": 0.550000011920929,
"rewards/chosen": 2.4845703125,
"rewards/margins": 5.8267578125,
"rewards/rejected": -3.3421875,
"step": 110
},
{
"epoch": 1.604026845637584,
"grad_norm": 0.8229545950889587,
"kl": 1.169921875,
"learning_rate": 9.440682244067722e-07,
"logits/chosen": 14943027.2,
"logits/rejected": 602931.2,
"logps/chosen": -302.4875,
"logps/rejected": -388.95,
"loss": 0.0933,
"num_unsafe": 0.5,
"rewards/chosen": 3.31484375,
"rewards/margins": 7.7796875,
"rewards/rejected": -4.46484375,
"step": 120
},
{
"epoch": 1.738255033557047,
"grad_norm": 0.960098385810852,
"kl": 0.125,
"learning_rate": 8.729705727120911e-07,
"logits/chosen": 12668518.4,
"logits/rejected": 3657728.0,
"logps/chosen": -342.75,
"logps/rejected": -402.15,
"loss": 0.1193,
"num_unsafe": 0.25,
"rewards/chosen": 2.979296875,
"rewards/margins": 7.533984375,
"rewards/rejected": -4.5546875,
"step": 130
},
{
"epoch": 1.87248322147651,
"grad_norm": 8.306931495666504,
"kl": 0.71875,
"learning_rate": 7.78437808244094e-07,
"logits/chosen": 2376089.6,
"logits/rejected": -10040934.4,
"logps/chosen": -286.5,
"logps/rejected": -374.8,
"loss": 0.0991,
"num_unsafe": 0.550000011920929,
"rewards/chosen": 3.276904296875,
"rewards/margins": 8.284716796875,
"rewards/rejected": -5.0078125,
"step": 140
},
{
"epoch": 2.0,
"grad_norm": 1.439923882484436,
"kl": 0.08018092066049576,
"learning_rate": 6.664097722614933e-07,
"logits/chosen": 8874954.105263159,
"logits/rejected": 2802310.736842105,
"logps/chosen": -302.2631578947368,
"logps/rejected": -389.42105263157896,
"loss": 0.0798,
"num_unsafe": 0.6315789222717285,
"rewards/chosen": 3.531661184210526,
"rewards/margins": 8.710115131578947,
"rewards/rejected": -5.178453947368421,
"step": 150
},
{
"epoch": 2.134228187919463,
"grad_norm": 3.9872214794158936,
"kl": 0.16249999403953552,
"learning_rate": 5.439255982753717e-07,
"logits/chosen": -1116569.6,
"logits/rejected": -15123660.8,
"logps/chosen": -307.3375,
"logps/rejected": -428.3,
"loss": 0.0827,
"num_unsafe": 0.5,
"rewards/chosen": 3.5447265625,
"rewards/margins": 9.397851562500001,
"rewards/rejected": -5.853125,
"step": 160
},
{
"epoch": 2.2684563758389262,
"grad_norm": 1.704241156578064,
"kl": 0.24687500298023224,
"learning_rate": 4.1868141740255817e-07,
"logits/chosen": 15165849.6,
"logits/rejected": 8513945.6,
"logps/chosen": -305.3875,
"logps/rejected": -393.5,
"loss": 0.0919,
"num_unsafe": 0.699999988079071,
"rewards/chosen": 3.2162109375,
"rewards/margins": 8.9154296875,
"rewards/rejected": -5.69921875,
"step": 170
},
{
"epoch": 2.402684563758389,
"grad_norm": 1.0867478847503662,
"kl": 0.0,
"learning_rate": 2.985467821431687e-07,
"logits/chosen": 13270835.2,
"logits/rejected": 5517721.6,
"logps/chosen": -308.5625,
"logps/rejected": -415.5,
"loss": 0.0916,
"num_unsafe": 0.6499999761581421,
"rewards/chosen": 3.23125,
"rewards/margins": 9.74296875,
"rewards/rejected": -6.51171875,
"step": 180
},
{
"epoch": 2.5369127516778525,
"grad_norm": 0.9046293497085571,
"kl": 0.12187500298023224,
"learning_rate": 1.9107019345483288e-07,
"logits/chosen": 5835980.8,
"logits/rejected": -6291456.0,
"logps/chosen": -355.0,
"logps/rejected": -414.2,
"loss": 0.0807,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 2.9845703125,
"rewards/margins": 9.7416015625,
"rewards/rejected": -6.75703125,
"step": 190
},
{
"epoch": 2.6711409395973154,
"grad_norm": 0.7929665446281433,
"kl": 0.02500000037252903,
"learning_rate": 1.030048006760823e-07,
"logits/chosen": 15709798.4,
"logits/rejected": 2888089.6,
"logps/chosen": -350.425,
"logps/rejected": -444.375,
"loss": 0.1004,
"num_unsafe": 0.25,
"rewards/chosen": 3.40361328125,
"rewards/margins": 9.811425781250001,
"rewards/rejected": -6.4078125,
"step": 200
},
{
"epoch": 2.8053691275167782,
"grad_norm": 6.052427291870117,
"kl": 0.012500000186264515,
"learning_rate": 3.9884076317064807e-08,
"logits/chosen": 9807872.0,
"logits/rejected": 3389440.0,
"logps/chosen": -287.175,
"logps/rejected": -379.05,
"loss": 0.0973,
"num_unsafe": 0.4000000059604645,
"rewards/chosen": 3.32802734375,
"rewards/margins": 9.24248046875,
"rewards/rejected": -5.914453125,
"step": 210
},
{
"epoch": 2.9395973154362416,
"grad_norm": 0.8664066195487976,
"kl": 0.503125011920929,
"learning_rate": 5.674127631043024e-09,
"logits/chosen": 4027187.2,
"logits/rejected": -7430963.2,
"logps/chosen": -275.8,
"logps/rejected": -387.6,
"loss": 0.0668,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 3.6443359375,
"rewards/margins": 9.8779296875,
"rewards/rejected": -6.23359375,
"step": 220
}
],
"logging_steps": 10,
"max_steps": 225,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 300.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}