FormlessAI's picture
Training in progress, epoch 0, checkpoint
edd685c verified
Invalid JSON: Unexpected token 'N', ..."/chosen": NaN, "... is not valid JSON
{
"best_global_step": null,
"best_metric": 0.005929804872721434,
"best_model_checkpoint": null,
"epoch": 0.018426386585590565,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018426386585590566,
"grad_norm": 5.280612945556641,
"learning_rate": 1.3620564299313518e-05,
"logits/chosen": 5.0,
"logits/rejected": 3.8609375953674316,
"logps/chosen": -157.5,
"logps/rejected": -104.55000305175781,
"loss": 0.6152,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.10301513969898224,
"rewards/margins": 0.17802734673023224,
"rewards/rejected": -0.075439453125,
"step": 5
},
{
"epoch": 0.0036852773171181133,
"grad_norm": 1.3649892807006836,
"learning_rate": 3.064626967345541e-05,
"logits/chosen": 4.918749809265137,
"logits/rejected": 4.348437309265137,
"logps/chosen": -149.1999969482422,
"logps/rejected": -127.6500015258789,
"loss": 0.1506,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.57421875,
"rewards/margins": 2.2632813453674316,
"rewards/rejected": -1.6902344226837158,
"step": 10
},
{
"epoch": 0.00552791597567717,
"grad_norm": 0.4213128387928009,
"learning_rate": 4.7671975047597314e-05,
"logits/chosen": 4.181250095367432,
"logits/rejected": 4.234375,
"logps/chosen": -135.14999389648438,
"logps/rejected": -164.6999969482422,
"loss": 0.0211,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.9033721685409546,
"rewards/margins": 6.134375095367432,
"rewards/rejected": -5.2265625,
"step": 15
},
{
"epoch": 0.0073705546342362266,
"grad_norm": 0.008017129264771938,
"learning_rate": 6.469768042173921e-05,
"logits/chosen": NaN,
"logits/rejected": NaN,
"logps/chosen": -121.4000015258789,
"logps/rejected": -159.60000610351562,
"loss": 0.1003,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 1.757226586341858,
"rewards/margins": 8.699999809265137,
"rewards/rejected": -6.943749904632568,
"step": 20
},
{
"epoch": 0.009213193292795283,
"grad_norm": 0.142906054854393,
"learning_rate": 8.17233857958811e-05,
"logits/chosen": 3.2109375,
"logits/rejected": 3.495312452316284,
"logps/chosen": -130.25,
"logps/rejected": -203.89999389648438,
"loss": 0.012,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.8806641101837158,
"rewards/margins": 11.475000381469727,
"rewards/rejected": -9.600000381469727,
"step": 25
},
{
"epoch": 0.01105583195135434,
"grad_norm": 0.07906725257635117,
"learning_rate": 9.8749091170023e-05,
"logits/chosen": 3.089062452316284,
"logits/rejected": 3.160937547683716,
"logps/chosen": -140.9499969482422,
"logps/rejected": -238.89999389648438,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.5691406726837158,
"rewards/margins": 13.162500381469727,
"rewards/rejected": -11.606249809265137,
"step": 30
},
{
"epoch": 0.012898470609913396,
"grad_norm": 0.0008193934918381274,
"learning_rate": 0.0001157747965441649,
"logits/chosen": 2.9078125953674316,
"logits/rejected": 2.964062452316284,
"logps/chosen": -145.6999969482422,
"logps/rejected": -262.3999938964844,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.6046142578125,
"rewards/margins": 15.056249618530273,
"rewards/rejected": -14.456250190734863,
"step": 35
},
{
"epoch": 0.014741109268472453,
"grad_norm": 0.01741017960011959,
"learning_rate": 0.0001328005019183068,
"logits/chosen": 2.737499952316284,
"logits/rejected": 2.5140624046325684,
"logps/chosen": -156.14999389648438,
"logps/rejected": -299.29998779296875,
"loss": 0.0051,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.29374998807907104,
"rewards/margins": 17.787500381469727,
"rewards/rejected": -18.068750381469727,
"step": 40
},
{
"epoch": 0.01658374792703151,
"grad_norm": 0.00017174682579934597,
"learning_rate": 0.00014982620729244868,
"logits/chosen": 2.461718797683716,
"logits/rejected": NaN,
"logps/chosen": -162.10000610351562,
"logps/rejected": -340.1000061035156,
"loss": 0.0105,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.6267578601837158,
"rewards/margins": 21.0,
"rewards/rejected": -22.612499237060547,
"step": 45
},
{
"epoch": 0.018426386585590565,
"grad_norm": 0.08511215448379517,
"learning_rate": 0.00016685191266659058,
"logits/chosen": NaN,
"logits/rejected": 2.659374952316284,
"logps/chosen": -164.14999389648438,
"logps/rejected": -335.6000061035156,
"loss": 0.0115,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -1.616796851158142,
"rewards/margins": 20.087499618530273,
"rewards/rejected": -21.712499618530273,
"step": 50
},
{
"epoch": 0.018426386585590565,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": -173.89877319335938,
"eval_logps/rejected": -353.09814453125,
"eval_loss": 0.005929804872721434,
"eval_rewards/accuracies": 0.9923312664031982,
"eval_rewards/chosen": -2.196124315261841,
"eval_rewards/margins": 21.41180992126465,
"eval_rewards/rejected": -23.601993560791016,
"eval_runtime": 13.8851,
"eval_samples_per_second": 93.77,
"eval_steps_per_second": 11.739,
"step": 50
}
],
"logging_steps": 5,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}