DPO_CPPO / dataset-0 /checkpoint-600 /trainer_state.json
Shahradmz's picture
Upload folder using huggingface_hub
2901fae verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.255639097744361,
"eval_steps": 200,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07518796992481203,
"grad_norm": 0.00031717625653774825,
"learning_rate": 4.906015037593986e-06,
"logits/chosen": -3.08984375,
"logits/rejected": -3.359375,
"logps/chosen": -759.5999755859375,
"logps/rejected": -601.7999877929688,
"loss": 0.0505,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 3.9212889671325684,
"rewards/margins": 19.23046875,
"rewards/rejected": -15.315332412719727,
"step": 20
},
{
"epoch": 0.15037593984962405,
"grad_norm": 2.5674383538843824e-13,
"learning_rate": 4.81203007518797e-06,
"logits/chosen": -3.37890625,
"logits/rejected": -3.901562452316284,
"logps/chosen": -797.4000244140625,
"logps/rejected": -834.2000122070312,
"loss": 0.0018,
"rewards/accuracies": 0.9984375238418579,
"rewards/chosen": 0.5521484613418579,
"rewards/margins": 38.98125076293945,
"rewards/rejected": -38.412498474121094,
"step": 40
},
{
"epoch": 0.22556390977443608,
"grad_norm": 4.184555188916154e-12,
"learning_rate": 4.718045112781955e-06,
"logits/chosen": -3.237499952316284,
"logits/rejected": -4.165625095367432,
"logps/chosen": -774.5999755859375,
"logps/rejected": -944.7999877929688,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.255664110183716,
"rewards/margins": 53.625,
"rewards/rejected": -50.36249923706055,
"step": 60
},
{
"epoch": 0.3007518796992481,
"grad_norm": 3.674299650107461e-07,
"learning_rate": 4.62406015037594e-06,
"logits/chosen": -3.0601563453674316,
"logits/rejected": -4.301562309265137,
"logps/chosen": -744.2000122070312,
"logps/rejected": -968.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.635937690734863,
"rewards/margins": 58.375,
"rewards/rejected": -51.724998474121094,
"step": 80
},
{
"epoch": 0.37593984962406013,
"grad_norm": 2.9469468290533765e-13,
"learning_rate": 4.530075187969925e-06,
"logits/chosen": -3.035937547683716,
"logits/rejected": -4.3359375,
"logps/chosen": -754.0,
"logps/rejected": -981.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.212500095367432,
"rewards/margins": 58.912498474121094,
"rewards/rejected": -52.724998474121094,
"step": 100
},
{
"epoch": 0.45112781954887216,
"grad_norm": 2.450421719466682e-10,
"learning_rate": 4.43609022556391e-06,
"logits/chosen": -3.0367188453674316,
"logits/rejected": -4.3203125,
"logps/chosen": -740.0,
"logps/rejected": -961.7999877929688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.285937309265137,
"rewards/margins": 57.474998474121094,
"rewards/rejected": -51.1875,
"step": 120
},
{
"epoch": 0.5263157894736842,
"grad_norm": 1.6351303999698253e-10,
"learning_rate": 4.342105263157895e-06,
"logits/chosen": -3.0234375,
"logits/rejected": -4.3046875,
"logps/chosen": -754.0,
"logps/rejected": -969.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.207812309265137,
"rewards/margins": 57.662498474121094,
"rewards/rejected": -51.412498474121094,
"step": 140
},
{
"epoch": 0.6015037593984962,
"grad_norm": 1.5915083665281199e-09,
"learning_rate": 4.24812030075188e-06,
"logits/chosen": -3.03125,
"logits/rejected": -4.318749904632568,
"logps/chosen": -748.2000122070312,
"logps/rejected": -958.7999877929688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.248437404632568,
"rewards/margins": 57.525001525878906,
"rewards/rejected": -51.25,
"step": 160
},
{
"epoch": 0.6766917293233082,
"grad_norm": 4.297014302694241e-12,
"learning_rate": 4.1541353383458646e-06,
"logits/chosen": -3.0406250953674316,
"logits/rejected": -4.279687404632568,
"logps/chosen": -741.4000244140625,
"logps/rejected": -961.4000244140625,
"loss": 0.0132,
"rewards/accuracies": 0.9906250238418579,
"rewards/chosen": 6.785937309265137,
"rewards/margins": 57.4375,
"rewards/rejected": -50.625,
"step": 180
},
{
"epoch": 0.7518796992481203,
"grad_norm": 9.095973454791659e-12,
"learning_rate": 4.06015037593985e-06,
"logits/chosen": -3.055468797683716,
"logits/rejected": -4.318749904632568,
"logps/chosen": -733.0,
"logps/rejected": -968.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.356249809265137,
"rewards/margins": 59.5625,
"rewards/rejected": -52.224998474121094,
"step": 200
},
{
"epoch": 0.7518796992481203,
"eval_logits/chosen": -3.058178186416626,
"eval_logits/rejected": -4.299867153167725,
"eval_logps/chosen": -741.7021484375,
"eval_logps/rejected": -972.0850830078125,
"eval_loss": 2.180002622864663e-09,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 7.262632846832275,
"eval_rewards/margins": 58.87765884399414,
"eval_rewards/rejected": -51.62765884399414,
"eval_runtime": 8.4937,
"eval_samples_per_second": 176.601,
"eval_score": -0.6606304049491882,
"eval_steps_per_second": 5.533,
"step": 200
},
{
"epoch": 0.8270676691729323,
"grad_norm": 2.3694597465927517e-14,
"learning_rate": 3.966165413533835e-06,
"logits/chosen": -3.057812452316284,
"logits/rejected": -4.2890625,
"logps/chosen": -739.0,
"logps/rejected": -973.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.339062690734863,
"rewards/margins": 58.67499923706055,
"rewards/rejected": -51.32500076293945,
"step": 220
},
{
"epoch": 0.9022556390977443,
"grad_norm": 4.7239515256796625e-09,
"learning_rate": 3.87218045112782e-06,
"logits/chosen": -3.0648436546325684,
"logits/rejected": -4.301562309265137,
"logps/chosen": -739.4000244140625,
"logps/rejected": -980.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.296875,
"rewards/margins": 59.57500076293945,
"rewards/rejected": -52.25,
"step": 240
},
{
"epoch": 0.9774436090225563,
"grad_norm": 1.1010824468107971e-09,
"learning_rate": 3.778195488721805e-06,
"logits/chosen": -3.059375047683716,
"logits/rejected": -4.317187309265137,
"logps/chosen": -730.0,
"logps/rejected": -975.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.485937595367432,
"rewards/margins": 60.162498474121094,
"rewards/rejected": -52.67499923706055,
"step": 260
},
{
"epoch": 1.0526315789473684,
"grad_norm": 5.510844973332184e-09,
"learning_rate": 3.6842105263157896e-06,
"logits/chosen": -3.06640625,
"logits/rejected": -4.3046875,
"logps/chosen": -736.4000244140625,
"logps/rejected": -961.5999755859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.443749904632568,
"rewards/margins": 58.42499923706055,
"rewards/rejected": -51.025001525878906,
"step": 280
},
{
"epoch": 1.1278195488721805,
"grad_norm": 1.0805968862929627e-08,
"learning_rate": 3.590225563909775e-06,
"logits/chosen": -3.067187547683716,
"logits/rejected": -4.329687595367432,
"logps/chosen": -737.0,
"logps/rejected": -978.2000122070312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.373437404632568,
"rewards/margins": 59.849998474121094,
"rewards/rejected": -52.474998474121094,
"step": 300
},
{
"epoch": 1.2030075187969924,
"grad_norm": 1.8280615029499892e-13,
"learning_rate": 3.4962406015037596e-06,
"logits/chosen": -3.063281297683716,
"logits/rejected": -4.317187309265137,
"logps/chosen": -739.2000122070312,
"logps/rejected": -977.2000122070312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.349999904632568,
"rewards/margins": 59.67499923706055,
"rewards/rejected": -52.337501525878906,
"step": 320
},
{
"epoch": 1.2781954887218046,
"grad_norm": 8.685091807936255e-12,
"learning_rate": 3.4022556390977448e-06,
"logits/chosen": -3.057812452316284,
"logits/rejected": -4.293749809265137,
"logps/chosen": -740.4000244140625,
"logps/rejected": -961.2000122070312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.3203125,
"rewards/margins": 58.587501525878906,
"rewards/rejected": -51.25,
"step": 340
},
{
"epoch": 1.3533834586466165,
"grad_norm": 1.6244164681709312e-07,
"learning_rate": 3.3082706766917295e-06,
"logits/chosen": -3.059375047683716,
"logits/rejected": -4.295312404632568,
"logps/chosen": -738.2000122070312,
"logps/rejected": -958.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.278124809265137,
"rewards/margins": 58.337501525878906,
"rewards/rejected": -51.04999923706055,
"step": 360
},
{
"epoch": 1.4285714285714286,
"grad_norm": 3.8272192733826815e-13,
"learning_rate": 3.2142857142857147e-06,
"logits/chosen": -3.0648436546325684,
"logits/rejected": -4.318749904632568,
"logps/chosen": -731.0,
"logps/rejected": -984.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.357812404632568,
"rewards/margins": 60.38750076293945,
"rewards/rejected": -53.04999923706055,
"step": 380
},
{
"epoch": 1.5037593984962405,
"grad_norm": 6.450130800887369e-09,
"learning_rate": 3.1203007518796995e-06,
"logits/chosen": -3.0570311546325684,
"logits/rejected": -4.317187309265137,
"logps/chosen": -735.5999755859375,
"logps/rejected": -965.5999755859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.471875190734863,
"rewards/margins": 59.13750076293945,
"rewards/rejected": -51.650001525878906,
"step": 400
},
{
"epoch": 1.5037593984962405,
"eval_logits/chosen": -3.0611701011657715,
"eval_logits/rejected": -4.303191661834717,
"eval_logps/chosen": -741.872314453125,
"eval_logps/rejected": -972.85107421875,
"eval_loss": 1.5668466524232372e-09,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 7.266622543334961,
"eval_rewards/margins": 58.98404312133789,
"eval_rewards/rejected": -51.70744705200195,
"eval_runtime": 8.4851,
"eval_samples_per_second": 176.78,
"eval_score": -0.6956531405448914,
"eval_steps_per_second": 5.539,
"step": 400
},
{
"epoch": 1.5789473684210527,
"grad_norm": 1.5739247094468196e-10,
"learning_rate": 3.0263157894736843e-06,
"logits/chosen": -3.063281297683716,
"logits/rejected": -4.301562309265137,
"logps/chosen": -732.5999755859375,
"logps/rejected": -961.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.067187309265137,
"rewards/margins": 58.025001525878906,
"rewards/rejected": -50.95000076293945,
"step": 420
},
{
"epoch": 1.6541353383458648,
"grad_norm": 5.037244548596992e-09,
"learning_rate": 2.9323308270676694e-06,
"logits/chosen": -3.0679688453674316,
"logits/rejected": -4.328125,
"logps/chosen": -735.5999755859375,
"logps/rejected": -983.5999755859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.542187690734863,
"rewards/margins": 60.11249923706055,
"rewards/rejected": -52.54999923706055,
"step": 440
},
{
"epoch": 1.7293233082706767,
"grad_norm": 1.907719763871417e-13,
"learning_rate": 2.8383458646616546e-06,
"logits/chosen": -3.065624952316284,
"logits/rejected": -4.318749904632568,
"logps/chosen": -732.0,
"logps/rejected": -971.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.348437309265137,
"rewards/margins": 59.599998474121094,
"rewards/rejected": -52.275001525878906,
"step": 460
},
{
"epoch": 1.8045112781954886,
"grad_norm": 2.413237608760496e-12,
"learning_rate": 2.7443609022556394e-06,
"logits/chosen": -3.06640625,
"logits/rejected": -4.303124904632568,
"logps/chosen": -734.0,
"logps/rejected": -962.2000122070312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.337500095367432,
"rewards/margins": 58.900001525878906,
"rewards/rejected": -51.54999923706055,
"step": 480
},
{
"epoch": 1.8796992481203008,
"grad_norm": 3.8499016698807266e-05,
"learning_rate": 2.650375939849624e-06,
"logits/chosen": -3.063281297683716,
"logits/rejected": -4.34375,
"logps/chosen": -741.0,
"logps/rejected": -975.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.368750095367432,
"rewards/margins": 60.42499923706055,
"rewards/rejected": -53.0625,
"step": 500
},
{
"epoch": 1.954887218045113,
"grad_norm": 1.6477178899318827e-11,
"learning_rate": 2.556390977443609e-06,
"logits/chosen": -3.051562547683716,
"logits/rejected": -4.314062595367432,
"logps/chosen": -734.2000122070312,
"logps/rejected": -970.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.464062690734863,
"rewards/margins": 59.70000076293945,
"rewards/rejected": -52.224998474121094,
"step": 520
},
{
"epoch": 2.030075187969925,
"grad_norm": 2.0829162250893226e-08,
"learning_rate": 2.462406015037594e-06,
"logits/chosen": -3.022656202316284,
"logits/rejected": -4.27734375,
"logps/chosen": -713.4000244140625,
"logps/rejected": -979.4000244140625,
"loss": 0.0045,
"rewards/accuracies": 0.995312511920929,
"rewards/chosen": 8.104687690734863,
"rewards/margins": 60.17499923706055,
"rewards/rejected": -52.0625,
"step": 540
},
{
"epoch": 2.1052631578947367,
"grad_norm": 1.0373247696775237e-09,
"learning_rate": 2.368421052631579e-06,
"logits/chosen": -2.9625000953674316,
"logits/rejected": -4.248437404632568,
"logps/chosen": -729.2000122070312,
"logps/rejected": -967.4000244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.295312881469727,
"rewards/margins": 59.724998474121094,
"rewards/rejected": -51.42499923706055,
"step": 560
},
{
"epoch": 2.180451127819549,
"grad_norm": 1.0771024319146517e-10,
"learning_rate": 2.274436090225564e-06,
"logits/chosen": -2.964062452316284,
"logits/rejected": -4.268750190734863,
"logps/chosen": -727.2000122070312,
"logps/rejected": -964.0,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.871874809265137,
"rewards/margins": 59.3125,
"rewards/rejected": -51.42499923706055,
"step": 580
},
{
"epoch": 2.255639097744361,
"grad_norm": 3.252682975051824e-07,
"learning_rate": 2.180451127819549e-06,
"logits/chosen": -2.9429688453674316,
"logits/rejected": -4.2734375,
"logps/chosen": -725.0,
"logps/rejected": -963.5999755859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 7.995312690734863,
"rewards/margins": 59.537498474121094,
"rewards/rejected": -51.537498474121094,
"step": 600
},
{
"epoch": 2.255639097744361,
"eval_logits/chosen": -2.953125,
"eval_logits/rejected": -4.254654407501221,
"eval_logps/chosen": -734.9786987304688,
"eval_logps/rejected": -968.7659301757812,
"eval_loss": 3.946915239083637e-09,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 7.918882846832275,
"eval_rewards/margins": 59.25,
"eval_rewards/rejected": -51.32978820800781,
"eval_runtime": 8.4849,
"eval_samples_per_second": 176.784,
"eval_score": -0.8367462158203125,
"eval_steps_per_second": 5.539,
"step": 600
}
],
"logging_steps": 20,
"max_steps": 1064,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}