Invalid JSON: Unexpected token 'N', ..."/chosen": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": 0.005929804872721434, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.018426386585590565, | |
| "eval_steps": 50, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018426386585590566, | |
| "grad_norm": 5.280612945556641, | |
| "learning_rate": 1.3620564299313518e-05, | |
| "logits/chosen": 5.0, | |
| "logits/rejected": 3.8609375953674316, | |
| "logps/chosen": -157.5, | |
| "logps/rejected": -104.55000305175781, | |
| "loss": 0.6152, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.10301513969898224, | |
| "rewards/margins": 0.17802734673023224, | |
| "rewards/rejected": -0.075439453125, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0036852773171181133, | |
| "grad_norm": 1.3649892807006836, | |
| "learning_rate": 3.064626967345541e-05, | |
| "logits/chosen": 4.918749809265137, | |
| "logits/rejected": 4.348437309265137, | |
| "logps/chosen": -149.1999969482422, | |
| "logps/rejected": -127.6500015258789, | |
| "loss": 0.1506, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.57421875, | |
| "rewards/margins": 2.2632813453674316, | |
| "rewards/rejected": -1.6902344226837158, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00552791597567717, | |
| "grad_norm": 0.4213128387928009, | |
| "learning_rate": 4.7671975047597314e-05, | |
| "logits/chosen": 4.181250095367432, | |
| "logits/rejected": 4.234375, | |
| "logps/chosen": -135.14999389648438, | |
| "logps/rejected": -164.6999969482422, | |
| "loss": 0.0211, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.9033721685409546, | |
| "rewards/margins": 6.134375095367432, | |
| "rewards/rejected": -5.2265625, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0073705546342362266, | |
| "grad_norm": 0.008017129264771938, | |
| "learning_rate": 6.469768042173921e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -121.4000015258789, | |
| "logps/rejected": -159.60000610351562, | |
| "loss": 0.1003, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 1.757226586341858, | |
| "rewards/margins": 8.699999809265137, | |
| "rewards/rejected": -6.943749904632568, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009213193292795283, | |
| "grad_norm": 0.142906054854393, | |
| "learning_rate": 8.17233857958811e-05, | |
| "logits/chosen": 3.2109375, | |
| "logits/rejected": 3.495312452316284, | |
| "logps/chosen": -130.25, | |
| "logps/rejected": -203.89999389648438, | |
| "loss": 0.012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.8806641101837158, | |
| "rewards/margins": 11.475000381469727, | |
| "rewards/rejected": -9.600000381469727, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01105583195135434, | |
| "grad_norm": 0.07906725257635117, | |
| "learning_rate": 9.8749091170023e-05, | |
| "logits/chosen": 3.089062452316284, | |
| "logits/rejected": 3.160937547683716, | |
| "logps/chosen": -140.9499969482422, | |
| "logps/rejected": -238.89999389648438, | |
| "loss": 0.0018, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.5691406726837158, | |
| "rewards/margins": 13.162500381469727, | |
| "rewards/rejected": -11.606249809265137, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012898470609913396, | |
| "grad_norm": 0.0008193934918381274, | |
| "learning_rate": 0.0001157747965441649, | |
| "logits/chosen": 2.9078125953674316, | |
| "logits/rejected": 2.964062452316284, | |
| "logps/chosen": -145.6999969482422, | |
| "logps/rejected": -262.3999938964844, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.6046142578125, | |
| "rewards/margins": 15.056249618530273, | |
| "rewards/rejected": -14.456250190734863, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.014741109268472453, | |
| "grad_norm": 0.01741017960011959, | |
| "learning_rate": 0.0001328005019183068, | |
| "logits/chosen": 2.737499952316284, | |
| "logits/rejected": 2.5140624046325684, | |
| "logps/chosen": -156.14999389648438, | |
| "logps/rejected": -299.29998779296875, | |
| "loss": 0.0051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.29374998807907104, | |
| "rewards/margins": 17.787500381469727, | |
| "rewards/rejected": -18.068750381469727, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01658374792703151, | |
| "grad_norm": 0.00017174682579934597, | |
| "learning_rate": 0.00014982620729244868, | |
| "logits/chosen": 2.461718797683716, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -162.10000610351562, | |
| "logps/rejected": -340.1000061035156, | |
| "loss": 0.0105, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.6267578601837158, | |
| "rewards/margins": 21.0, | |
| "rewards/rejected": -22.612499237060547, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.018426386585590565, | |
| "grad_norm": 0.08511215448379517, | |
| "learning_rate": 0.00016685191266659058, | |
| "logits/chosen": NaN, | |
| "logits/rejected": 2.659374952316284, | |
| "logps/chosen": -164.14999389648438, | |
| "logps/rejected": -335.6000061035156, | |
| "loss": 0.0115, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.616796851158142, | |
| "rewards/margins": 20.087499618530273, | |
| "rewards/rejected": -21.712499618530273, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.018426386585590565, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": NaN, | |
| "eval_logps/chosen": -173.89877319335938, | |
| "eval_logps/rejected": -353.09814453125, | |
| "eval_loss": 0.005929804872721434, | |
| "eval_rewards/accuracies": 0.9923312664031982, | |
| "eval_rewards/chosen": -2.196124315261841, | |
| "eval_rewards/margins": 21.41180992126465, | |
| "eval_rewards/rejected": -23.601993560791016, | |
| "eval_runtime": 13.8851, | |
| "eval_samples_per_second": 93.77, | |
| "eval_steps_per_second": 11.739, | |
| "step": 50 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |