KVL-DPO / trainer_state.json
amoeba04's picture
Upload folder using huggingface_hub
6e0d62c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 10000.0,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006418613980543576,
"grad_norm": 32.048425683927725,
"learning_rate": 4.999493072462126e-07,
"logits/chosen": -2.115234375,
"logits/rejected": -1.5445556640625,
"logps/chosen": -132.65625,
"logps/rejected": -181.029296875,
"loss": 0.703399658203125,
"rewards/accuracies": 0.439453125,
"rewards/chosen": -0.015892624855041504,
"rewards/margins": -0.00758051872253418,
"rewards/rejected": -0.008310675621032715,
"step": 1
},
{
"epoch": 0.06418613980543576,
"grad_norm": 23.578875399152054,
"learning_rate": 4.949476630105669e-07,
"logits/chosen": -2.146253824234009,
"logits/rejected": -1.5375298261642456,
"logps/chosen": -127.29991149902344,
"logps/rejected": -182.31988525390625,
"loss": 0.6579203075832791,
"rewards/accuracies": 0.5796440839767456,
"rewards/chosen": 0.001607447862625122,
"rewards/margins": 0.09492193162441254,
"rewards/rejected": -0.09333191812038422,
"step": 10
},
{
"epoch": 0.12837227961087153,
"grad_norm": 16.096083288887513,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -2.1872315406799316,
"logits/rejected": -1.5583984851837158,
"logps/chosen": -127.17167663574219,
"logps/rejected": -188.09335327148438,
"loss": 0.5185166358947754,
"rewards/accuracies": 0.7955077886581421,
"rewards/chosen": 0.10318219661712646,
"rewards/margins": 0.5961636304855347,
"rewards/rejected": -0.49292677640914917,
"step": 20
},
{
"epoch": 0.1925584194163073,
"grad_norm": 16.84839050494715,
"learning_rate": 4.557459664734141e-07,
"logits/chosen": -2.217529296875,
"logits/rejected": -1.5862548351287842,
"logps/chosen": -125.22636413574219,
"logps/rejected": -189.91366577148438,
"loss": 0.41579198837280273,
"rewards/accuracies": 0.850781261920929,
"rewards/chosen": 0.16709718108177185,
"rewards/margins": 1.2345550060272217,
"rewards/rejected": -1.067326307296753,
"step": 30
},
{
"epoch": 0.25674455922174305,
"grad_norm": 8.87416271370084,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": -2.2668824195861816,
"logits/rejected": -1.598077416419983,
"logps/chosen": -127.8832015991211,
"logps/rejected": -192.91796875,
"loss": 0.35149335861206055,
"rewards/accuracies": 0.866406261920929,
"rewards/chosen": 0.12800344824790955,
"rewards/margins": 1.7798080444335938,
"rewards/rejected": -1.651770830154419,
"step": 40
},
{
"epoch": 0.3209306990271788,
"grad_norm": 8.909019678687008,
"learning_rate": 3.8361645653195024e-07,
"logits/chosen": -2.3679442405700684,
"logits/rejected": -1.644537329673767,
"logps/chosen": -127.83222961425781,
"logps/rejected": -205.20272827148438,
"loss": 0.30472755432128906,
"rewards/accuracies": 0.8851562738418579,
"rewards/chosen": 0.2481112778186798,
"rewards/margins": 2.5448379516601562,
"rewards/rejected": -2.296844482421875,
"step": 50
},
{
"epoch": 0.3851168388326146,
"grad_norm": 9.080316543515908,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -2.381664991378784,
"logits/rejected": -1.673553466796875,
"logps/chosen": -126.41679382324219,
"logps/rejected": -204.1730499267578,
"loss": 0.3040948390960693,
"rewards/accuracies": 0.883007824420929,
"rewards/chosen": 0.16746802628040314,
"rewards/margins": 2.610337734222412,
"rewards/rejected": -2.443005323410034,
"step": 60
},
{
"epoch": 0.44930297863805035,
"grad_norm": 7.319834355840221,
"learning_rate": 2.9010282021444005e-07,
"logits/chosen": -2.3894896507263184,
"logits/rejected": -1.654962182044983,
"logps/chosen": -126.53593444824219,
"logps/rejected": -207.21328735351562,
"loss": 0.2680961608886719,
"rewards/accuracies": 0.8951171636581421,
"rewards/chosen": 0.16773858666419983,
"rewards/margins": 2.727093458175659,
"rewards/rejected": -2.5592041015625,
"step": 70
},
{
"epoch": 0.5134891184434861,
"grad_norm": 11.714525445790118,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": -2.400561571121216,
"logits/rejected": -1.6822998523712158,
"logps/chosen": -126.2464828491211,
"logps/rejected": -208.8488311767578,
"loss": 0.27129082679748534,
"rewards/accuracies": 0.8958984613418579,
"rewards/chosen": 0.0786014050245285,
"rewards/margins": 2.6517059803009033,
"rewards/rejected": -2.5730834007263184,
"step": 80
},
{
"epoch": 0.5776752582489219,
"grad_norm": 13.05044359804742,
"learning_rate": 1.9017108392811062e-07,
"logits/chosen": -2.4099974632263184,
"logits/rejected": -1.6946532726287842,
"logps/chosen": -129.740234375,
"logps/rejected": -212.8679656982422,
"loss": 0.25001063346862795,
"rewards/accuracies": 0.9019531011581421,
"rewards/chosen": 0.07523002475500107,
"rewards/margins": 2.694448947906494,
"rewards/rejected": -2.618884325027466,
"step": 90
},
{
"epoch": 0.6418613980543576,
"grad_norm": 11.826565551460419,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -2.4135499000549316,
"logits/rejected": -1.6916077136993408,
"logps/chosen": -128.24374389648438,
"logps/rejected": -200.87850952148438,
"loss": 0.2475870132446289,
"rewards/accuracies": 0.9037109613418579,
"rewards/chosen": 0.13076062500476837,
"rewards/margins": 2.663525342941284,
"rewards/rejected": -2.5332884788513184,
"step": 100
},
{
"epoch": 0.7060475378597934,
"grad_norm": 19.350657646267706,
"learning_rate": 9.981443394050524e-08,
"logits/chosen": -2.416271924972534,
"logits/rejected": -1.668573021888733,
"logps/chosen": -124.5199203491211,
"logps/rejected": -204.02774047851562,
"loss": 0.24730167388916016,
"rewards/accuracies": 0.8970702886581421,
"rewards/chosen": 0.06585326045751572,
"rewards/margins": 2.7098052501678467,
"rewards/rejected": -2.6437134742736816,
"step": 110
},
{
"epoch": 0.7702336776652292,
"grad_norm": 14.914033483862706,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": -2.42510986328125,
"logits/rejected": -1.6651611328125,
"logps/chosen": -129.416015625,
"logps/rejected": -211.32461547851562,
"loss": 0.2530521869659424,
"rewards/accuracies": 0.8990234136581421,
"rewards/chosen": 0.03038964234292507,
"rewards/margins": 2.786761522293091,
"rewards/rejected": -2.7561402320861816,
"step": 120
},
{
"epoch": 0.834419817470665,
"grad_norm": 12.452750762359729,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -2.4213013648986816,
"logits/rejected": -1.6964843273162842,
"logps/chosen": -126.28125,
"logps/rejected": -202.9324188232422,
"loss": 0.24086828231811525,
"rewards/accuracies": 0.904101550579071,
"rewards/chosen": 0.0428071990609169,
"rewards/margins": 2.784435987472534,
"rewards/rejected": -2.741345167160034,
"step": 130
},
{
"epoch": 0.8986059572761007,
"grad_norm": 7.337706041982024,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -2.431103467941284,
"logits/rejected": -1.689788818359375,
"logps/chosen": -127.52030944824219,
"logps/rejected": -207.2761688232422,
"loss": 0.24082815647125244,
"rewards/accuracies": 0.9056640863418579,
"rewards/chosen": 0.055707789957523346,
"rewards/margins": 2.8203492164611816,
"rewards/rejected": -2.7646727561950684,
"step": 140
},
{
"epoch": 0.9627920970815365,
"grad_norm": 9.490755329142848,
"learning_rate": 1.8227814754865067e-09,
"logits/chosen": -2.4349732398986816,
"logits/rejected": -1.698211669921875,
"logps/chosen": -129.65625,
"logps/rejected": -206.28164672851562,
"loss": 0.24139628410339356,
"rewards/accuracies": 0.9046875238418579,
"rewards/chosen": 0.04055643081665039,
"rewards/margins": 2.805835008621216,
"rewards/rejected": -2.7651429176330566,
"step": 150
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2243593606337659e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}