{ "best_global_step": null, "best_metric": 0.005929804872721434, "best_model_checkpoint": null, "epoch": 0.018426386585590565, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018426386585590566, "grad_norm": 5.280612945556641, "learning_rate": 1.3620564299313518e-05, "logits/chosen": 5.0, "logits/rejected": 3.8609375953674316, "logps/chosen": -157.5, "logps/rejected": -104.55000305175781, "loss": 0.6152, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.10301513969898224, "rewards/margins": 0.17802734673023224, "rewards/rejected": -0.075439453125, "step": 5 }, { "epoch": 0.0036852773171181133, "grad_norm": 1.3649892807006836, "learning_rate": 3.064626967345541e-05, "logits/chosen": 4.918749809265137, "logits/rejected": 4.348437309265137, "logps/chosen": -149.1999969482422, "logps/rejected": -127.6500015258789, "loss": 0.1506, "rewards/accuracies": 1.0, "rewards/chosen": 0.57421875, "rewards/margins": 2.2632813453674316, "rewards/rejected": -1.6902344226837158, "step": 10 }, { "epoch": 0.00552791597567717, "grad_norm": 0.4213128387928009, "learning_rate": 4.7671975047597314e-05, "logits/chosen": 4.181250095367432, "logits/rejected": 4.234375, "logps/chosen": -135.14999389648438, "logps/rejected": -164.6999969482422, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 0.9033721685409546, "rewards/margins": 6.134375095367432, "rewards/rejected": -5.2265625, "step": 15 }, { "epoch": 0.0073705546342362266, "grad_norm": 0.008017129264771938, "learning_rate": 6.469768042173921e-05, "logits/chosen": NaN, "logits/rejected": NaN, "logps/chosen": -121.4000015258789, "logps/rejected": -159.60000610351562, "loss": 0.1003, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 1.757226586341858, "rewards/margins": 8.699999809265137, "rewards/rejected": -6.943749904632568, "step": 20 }, { "epoch": 0.009213193292795283, "grad_norm": 0.142906054854393, "learning_rate": 8.17233857958811e-05, "logits/chosen": 3.2109375, "logits/rejected": 3.495312452316284, "logps/chosen": -130.25, "logps/rejected": -203.89999389648438, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 1.8806641101837158, "rewards/margins": 11.475000381469727, "rewards/rejected": -9.600000381469727, "step": 25 }, { "epoch": 0.01105583195135434, "grad_norm": 0.07906725257635117, "learning_rate": 9.8749091170023e-05, "logits/chosen": 3.089062452316284, "logits/rejected": 3.160937547683716, "logps/chosen": -140.9499969482422, "logps/rejected": -238.89999389648438, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.5691406726837158, "rewards/margins": 13.162500381469727, "rewards/rejected": -11.606249809265137, "step": 30 }, { "epoch": 0.012898470609913396, "grad_norm": 0.0008193934918381274, "learning_rate": 0.0001157747965441649, "logits/chosen": 2.9078125953674316, "logits/rejected": 2.964062452316284, "logps/chosen": -145.6999969482422, "logps/rejected": -262.3999938964844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.6046142578125, "rewards/margins": 15.056249618530273, "rewards/rejected": -14.456250190734863, "step": 35 }, { "epoch": 0.014741109268472453, "grad_norm": 0.01741017960011959, "learning_rate": 0.0001328005019183068, "logits/chosen": 2.737499952316284, "logits/rejected": 2.5140624046325684, "logps/chosen": -156.14999389648438, "logps/rejected": -299.29998779296875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.29374998807907104, "rewards/margins": 17.787500381469727, "rewards/rejected": -18.068750381469727, "step": 40 }, { "epoch": 0.01658374792703151, "grad_norm": 0.00017174682579934597, "learning_rate": 0.00014982620729244868, "logits/chosen": 2.461718797683716, "logits/rejected": NaN, "logps/chosen": -162.10000610351562, "logps/rejected": -340.1000061035156, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6267578601837158, "rewards/margins": 21.0, "rewards/rejected": -22.612499237060547, "step": 45 }, { "epoch": 0.018426386585590565, "grad_norm": 0.08511215448379517, "learning_rate": 0.00016685191266659058, "logits/chosen": NaN, "logits/rejected": 2.659374952316284, "logps/chosen": -164.14999389648438, "logps/rejected": -335.6000061035156, "loss": 0.0115, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.616796851158142, "rewards/margins": 20.087499618530273, "rewards/rejected": -21.712499618530273, "step": 50 }, { "epoch": 0.018426386585590565, "eval_logits/chosen": NaN, "eval_logits/rejected": NaN, "eval_logps/chosen": -173.89877319335938, "eval_logps/rejected": -353.09814453125, "eval_loss": 0.005929804872721434, "eval_rewards/accuracies": 0.9923312664031982, "eval_rewards/chosen": -2.196124315261841, "eval_rewards/margins": 21.41180992126465, "eval_rewards/rejected": -23.601993560791016, "eval_runtime": 13.8851, "eval_samples_per_second": 93.77, "eval_steps_per_second": 11.739, "step": 50 } ], "logging_steps": 5, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }