PEFT
Safetensors
24_mistral / trainer_state.json
Hightower1992's picture
Upload folder using huggingface_hub
afa672d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 13,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07692307692307693,
"grad_norm": 36.832374572753906,
"learning_rate": 2e-05,
"logits/chosen": -1.1979809999465942,
"logits/rejected": -0.8325968980789185,
"logps/chosen": -14.64731216430664,
"logps/rejected": -1005.949951171875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.15384615384615385,
"grad_norm": 48.96150207519531,
"learning_rate": 4e-05,
"logits/chosen": -1.198547124862671,
"logits/rejected": -0.7667418122291565,
"logps/chosen": -13.524721145629883,
"logps/rejected": -1149.490966796875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.23076923076923078,
"grad_norm": 28.16060447692871,
"learning_rate": 6e-05,
"logits/chosen": -1.2035918235778809,
"logits/rejected": -0.7419127225875854,
"logps/chosen": -13.74232006072998,
"logps/rejected": -988.1054077148438,
"loss": 0.5315,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.02188706398010254,
"rewards/margins": 0.3549116849899292,
"rewards/rejected": -0.33302462100982666,
"step": 3
},
{
"epoch": 0.3076923076923077,
"grad_norm": 8.096433639526367,
"learning_rate": 8e-05,
"logits/chosen": -1.1596912145614624,
"logits/rejected": -0.7631052732467651,
"logps/chosen": -12.934490203857422,
"logps/rejected": -968.7999267578125,
"loss": 0.0984,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.15821895003318787,
"rewards/margins": 2.3479552268981934,
"rewards/rejected": -2.1897363662719727,
"step": 4
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.0061914557591080666,
"learning_rate": 0.0001,
"logits/chosen": -1.0816950798034668,
"logits/rejected": -0.6030597686767578,
"logps/chosen": -13.906386375427246,
"logps/rejected": -1000.5069580078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.49933597445487976,
"rewards/margins": 10.395833969116211,
"rewards/rejected": -9.89649772644043,
"step": 5
},
{
"epoch": 0.46153846153846156,
"grad_norm": 2.919980923721255e-10,
"learning_rate": 0.00012,
"logits/chosen": -0.988565981388092,
"logits/rejected": -0.6128067374229431,
"logps/chosen": -8.973637580871582,
"logps/rejected": -1372.248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.7627038955688477,
"rewards/margins": 30.984756469726562,
"rewards/rejected": -30.22205352783203,
"step": 6
},
{
"epoch": 0.5384615384615384,
"grad_norm": 0.0,
"learning_rate": 0.00014,
"logits/chosen": -0.8455103039741516,
"logits/rejected": -0.5573095679283142,
"logps/chosen": -8.776264190673828,
"logps/rejected": -1911.2926025390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.5577902793884277,
"rewards/margins": 70.72723388671875,
"rewards/rejected": -70.16944885253906,
"step": 7
},
{
"epoch": 0.6153846153846154,
"grad_norm": 0.0,
"learning_rate": 0.00016,
"logits/chosen": -0.7601979970932007,
"logits/rejected": -0.5386461615562439,
"logps/chosen": -7.5249152183532715,
"logps/rejected": -2239.0810546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.6714935302734375,
"rewards/margins": 112.79618072509766,
"rewards/rejected": -112.12467956542969,
"step": 8
},
{
"epoch": 0.6923076923076923,
"grad_norm": 0.0,
"learning_rate": 0.00018,
"logits/chosen": -0.7337682247161865,
"logits/rejected": -0.6141457557678223,
"logps/chosen": -6.297776699066162,
"logps/rejected": -2562.424560546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.9677034616470337,
"rewards/margins": 154.79287719726562,
"rewards/rejected": -153.82516479492188,
"step": 9
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.0,
"learning_rate": 0.0002,
"logits/chosen": -0.7853107452392578,
"logits/rejected": -0.7350925803184509,
"logps/chosen": -8.834450721740723,
"logps/rejected": -3320.821533203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.912455677986145,
"rewards/margins": 216.040771484375,
"rewards/rejected": -215.12832641601562,
"step": 10
},
{
"epoch": 0.8461538461538461,
"grad_norm": 0.0,
"learning_rate": 0.00015000000000000001,
"logits/chosen": -0.8751146793365479,
"logits/rejected": -0.8513680696487427,
"logps/chosen": -7.073147773742676,
"logps/rejected": -3444.56396484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.7486312389373779,
"rewards/margins": 242.8885040283203,
"rewards/rejected": -242.13986206054688,
"step": 11
},
{
"epoch": 0.9230769230769231,
"grad_norm": 0.0,
"learning_rate": 5.000000000000002e-05,
"logits/chosen": -1.1207599639892578,
"logits/rejected": -1.0976228713989258,
"logps/chosen": -9.499576568603516,
"logps/rejected": -3954.97705078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.4698023200035095,
"rewards/margins": 283.22412109375,
"rewards/rejected": -282.75433349609375,
"step": 12
},
{
"epoch": 1.0,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits/chosen": -1.2583673000335693,
"logits/rejected": -1.2936705350875854,
"logps/chosen": -14.507587432861328,
"logps/rejected": -4967.0595703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.08211517333984375,
"rewards/margins": 360.6194763183594,
"rewards/rejected": -360.70159912109375,
"step": 13
}
],
"logging_steps": 1,
"max_steps": 13,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}