LASA_Models / qwen2.5_7b /trainer_state.json
yangjunxiao2021's picture
Upload 16 files
9f0e376 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1342281879194631,
"grad_norm": 36.80321502685547,
"kl": 0.20292969048023224,
"learning_rate": 9e-08,
"logits/chosen": -3776512.0,
"logits/rejected": 1030144.0,
"logps/chosen": -345.825,
"logps/rejected": -421.15,
"loss": 0.5017,
"num_unsafe": 0.5,
"rewards/chosen": -0.005364990234375,
"rewards/margins": -0.013800048828125,
"rewards/rejected": 0.00843505859375,
"step": 10
},
{
"epoch": 0.2684563758389262,
"grad_norm": 39.34665298461914,
"kl": 0.10800781100988388,
"learning_rate": 1.8999999999999998e-07,
"logits/chosen": 8857600.0,
"logits/rejected": 8883404.8,
"logps/chosen": -342.925,
"logps/rejected": -384.2,
"loss": 0.4963,
"num_unsafe": 0.699999988079071,
"rewards/chosen": 0.0006647109985351562,
"rewards/margins": 0.030388832092285156,
"rewards/rejected": -0.02972412109375,
"step": 20
},
{
"epoch": 0.40268456375838924,
"grad_norm": 30.005146026611328,
"kl": 0.03125,
"learning_rate": 2.9e-07,
"logits/chosen": 11644723.2,
"logits/rejected": 16304537.6,
"logps/chosen": -346.1,
"logps/rejected": -386.15,
"loss": 0.4873,
"num_unsafe": 0.6499999761581421,
"rewards/chosen": 0.016363525390625,
"rewards/margins": 0.10656433105468749,
"rewards/rejected": -0.0902008056640625,
"step": 30
},
{
"epoch": 0.5369127516778524,
"grad_norm": 26.460351943969727,
"kl": 0.0,
"learning_rate": 3.8999999999999997e-07,
"logits/chosen": 2756300.8,
"logits/rejected": 3816652.8,
"logps/chosen": -378.05,
"logps/rejected": -397.2,
"loss": 0.4541,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": -0.070062255859375,
"rewards/margins": 0.396392822265625,
"rewards/rejected": -0.466455078125,
"step": 40
},
{
"epoch": 0.6711409395973155,
"grad_norm": 20.090633392333984,
"kl": 0.0,
"learning_rate": 4.9e-07,
"logits/chosen": 23447142.4,
"logits/rejected": 26068582.4,
"logps/chosen": -372.375,
"logps/rejected": -427.3,
"loss": 0.4115,
"num_unsafe": 0.25,
"rewards/chosen": -0.18045654296875,
"rewards/margins": 0.9100219726562501,
"rewards/rejected": -1.090478515625,
"step": 50
},
{
"epoch": 0.8053691275167785,
"grad_norm": 29.12238121032715,
"kl": 0.0,
"learning_rate": 5.9e-07,
"logits/chosen": 5931827.2,
"logits/rejected": 13681459.2,
"logps/chosen": -318.65,
"logps/rejected": -371.35,
"loss": 0.3601,
"num_unsafe": 0.4000000059604645,
"rewards/chosen": 0.175537109375,
"rewards/margins": 1.456591796875,
"rewards/rejected": -1.2810546875,
"step": 60
},
{
"epoch": 0.9395973154362416,
"grad_norm": 13.03518009185791,
"kl": 0.0,
"learning_rate": 6.9e-07,
"logits/chosen": 5972787.2,
"logits/rejected": 2752512.0,
"logps/chosen": -301.875,
"logps/rejected": -392.0,
"loss": 0.2818,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 0.56591796875,
"rewards/margins": 2.639208984375,
"rewards/rejected": -2.073291015625,
"step": 70
},
{
"epoch": 1.0671140939597314,
"grad_norm": 19.125137329101562,
"kl": 0.00657894741743803,
"learning_rate": 7.9e-07,
"logits/chosen": 4748773.052631579,
"logits/rejected": 10441135.157894736,
"logps/chosen": -334.7631578947368,
"logps/rejected": -413.6842105263158,
"loss": 0.2538,
"num_unsafe": 0.5789473652839661,
"rewards/chosen": 0.8779296875,
"rewards/margins": 3.0099198190789473,
"rewards/rejected": -2.1319901315789473,
"step": 80
},
{
"epoch": 1.2013422818791946,
"grad_norm": 21.394241333007812,
"kl": 0.18906250596046448,
"learning_rate": 8.9e-07,
"logits/chosen": 8029593.6,
"logits/rejected": 4953088.0,
"logps/chosen": -333.6625,
"logps/rejected": -412.95,
"loss": 0.209,
"num_unsafe": 0.699999988079071,
"rewards/chosen": 1.444970703125,
"rewards/margins": 3.844580078125,
"rewards/rejected": -2.399609375,
"step": 90
},
{
"epoch": 1.3355704697986577,
"grad_norm": 8.02198314666748,
"kl": 0.20468750596046448,
"learning_rate": 9.9e-07,
"logits/chosen": 8033792.0,
"logits/rejected": 13608140.8,
"logps/chosen": -271.025,
"logps/rejected": -416.6,
"loss": 0.1474,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 2.274462890625,
"rewards/margins": 5.532470703125,
"rewards/rejected": -3.2580078125,
"step": 100
},
{
"epoch": 1.4697986577181208,
"grad_norm": 8.773207664489746,
"kl": 0.3812499940395355,
"learning_rate": 9.872634363932886e-07,
"logits/chosen": 5904793.6,
"logits/rejected": 10031923.2,
"logps/chosen": -401.7,
"logps/rejected": -434.35,
"loss": 0.1696,
"num_unsafe": 0.550000011920929,
"rewards/chosen": 2.0365234375,
"rewards/margins": 5.1421875,
"rewards/rejected": -3.1056640625,
"step": 110
},
{
"epoch": 1.604026845637584,
"grad_norm": 2.1352667808532715,
"kl": 0.725781261920929,
"learning_rate": 9.440682244067722e-07,
"logits/chosen": 19757875.2,
"logits/rejected": 22788505.6,
"logps/chosen": -301.825,
"logps/rejected": -432.2,
"loss": 0.1174,
"num_unsafe": 0.5,
"rewards/chosen": 2.513671875,
"rewards/margins": 6.514453125,
"rewards/rejected": -4.00078125,
"step": 120
},
{
"epoch": 1.738255033557047,
"grad_norm": 3.652601480484009,
"kl": 0.5015624761581421,
"learning_rate": 8.729705727120911e-07,
"logits/chosen": 17581260.8,
"logits/rejected": 17930649.6,
"logps/chosen": -337.55,
"logps/rejected": -427.85,
"loss": 0.1357,
"num_unsafe": 0.25,
"rewards/chosen": 2.494921875,
"rewards/margins": 6.15390625,
"rewards/rejected": -3.658984375,
"step": 130
},
{
"epoch": 1.87248322147651,
"grad_norm": 2.743739604949951,
"kl": 1.01171875,
"learning_rate": 7.78437808244094e-07,
"logits/chosen": 2695168.0,
"logits/rejected": -2059059.2,
"logps/chosen": -287.05,
"logps/rejected": -410.7,
"loss": 0.1098,
"num_unsafe": 0.550000011920929,
"rewards/chosen": 2.70234375,
"rewards/margins": 6.565234374999999,
"rewards/rejected": -3.862890625,
"step": 140
},
{
"epoch": 2.0,
"grad_norm": 1.1540242433547974,
"kl": 0.6759868264198303,
"learning_rate": 6.664097722614933e-07,
"logits/chosen": 9126534.736842105,
"logits/rejected": 11134652.631578946,
"logps/chosen": -306.94736842105266,
"logps/rejected": -413.3157894736842,
"loss": 0.0889,
"num_unsafe": 0.6315789222717285,
"rewards/chosen": 3.2284128289473686,
"rewards/margins": 7.489103618421053,
"rewards/rejected": -4.260690789473684,
"step": 150
},
{
"epoch": 2.134228187919463,
"grad_norm": 3.1307098865509033,
"kl": 1.0421874523162842,
"learning_rate": 5.439255982753717e-07,
"logits/chosen": -1861222.4,
"logits/rejected": 2562252.8,
"logps/chosen": -312.7125,
"logps/rejected": -468.6,
"loss": 0.0885,
"num_unsafe": 0.5,
"rewards/chosen": 3.3125,
"rewards/margins": 8.0171875,
"rewards/rejected": -4.7046875,
"step": 160
},
{
"epoch": 2.2684563758389262,
"grad_norm": 1.1788336038589478,
"kl": 0.06875000149011612,
"learning_rate": 4.1868141740255817e-07,
"logits/chosen": 9242316.8,
"logits/rejected": 9269657.6,
"logps/chosen": -309.7,
"logps/rejected": -428.95,
"loss": 0.0865,
"num_unsafe": 0.699999988079071,
"rewards/chosen": 3.3330078125,
"rewards/margins": 7.8982421875,
"rewards/rejected": -4.565234375,
"step": 170
},
{
"epoch": 2.402684563758389,
"grad_norm": 1.8654648065567017,
"kl": 0.515625,
"learning_rate": 2.985467821431687e-07,
"logits/chosen": 11762073.6,
"logits/rejected": 16133324.8,
"logps/chosen": -313.075,
"logps/rejected": -435.2,
"loss": 0.0958,
"num_unsafe": 0.6499999761581421,
"rewards/chosen": 3.34765625,
"rewards/margins": 8.335546875,
"rewards/rejected": -4.987890625,
"step": 180
},
{
"epoch": 2.5369127516778525,
"grad_norm": 2.7622787952423096,
"kl": 0.6656249761581421,
"learning_rate": 1.9107019345483288e-07,
"logits/chosen": 3596492.8,
"logits/rejected": 4794982.4,
"logps/chosen": -345.0,
"logps/rejected": -441.7,
"loss": 0.0803,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 3.247265625,
"rewards/margins": 8.13984375,
"rewards/rejected": -4.892578125,
"step": 190
},
{
"epoch": 2.6711409395973154,
"grad_norm": 1.9276018142700195,
"kl": 0.22812500596046448,
"learning_rate": 1.030048006760823e-07,
"logits/chosen": 22071296.0,
"logits/rejected": 22795059.2,
"logps/chosen": -341.225,
"logps/rejected": -464.8,
"loss": 0.0984,
"num_unsafe": 0.25,
"rewards/chosen": 2.9384765625,
"rewards/margins": 7.8150390625,
"rewards/rejected": -4.8765625,
"step": 200
},
{
"epoch": 2.8053691275167782,
"grad_norm": 1.9921404123306274,
"kl": 0.30156248807907104,
"learning_rate": 3.9884076317064807e-08,
"logits/chosen": 4773068.8,
"logits/rejected": 10355916.8,
"logps/chosen": -291.175,
"logps/rejected": -402.8,
"loss": 0.1062,
"num_unsafe": 0.4000000059604645,
"rewards/chosen": 2.92861328125,
"rewards/margins": 7.34970703125,
"rewards/rejected": -4.42109375,
"step": 210
},
{
"epoch": 2.9395973154362416,
"grad_norm": 2.2024121284484863,
"kl": 0.7749999761581421,
"learning_rate": 5.674127631043024e-09,
"logits/chosen": 4405657.6,
"logits/rejected": -535833.6,
"logps/chosen": -272.775,
"logps/rejected": -421.8,
"loss": 0.0643,
"num_unsafe": 0.6000000238418579,
"rewards/chosen": 3.49609375,
"rewards/margins": 8.56484375,
"rewards/rejected": -5.06875,
"step": 220
}
],
"logging_steps": 10,
"max_steps": 225,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 300.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}