{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0026171159382360636, "eval_steps": 20, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.40162393450737, "learning_rate": 0.0, "log_odds_chosen": -0.10784949362277985, "log_odds_ratio": -0.819013774394989, "logits/chosen": -114.29496002197266, "logits/rejected": -113.46098327636719, "logps/chosen": -1.9617910385131836, "logps/rejected": -1.8746588230133057, "loss": 5.1729, "nll_loss": 5.090985298156738, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.1961791068315506, "rewards/margins": -0.008713215589523315, "rewards/rejected": -0.1874658763408661, "step": 20 }, { "epoch": 0.0, "eval_log_odds_chosen": -0.010702775791287422, "eval_log_odds_ratio": -0.7446900606155396, "eval_logits/chosen": -111.06063079833984, "eval_logits/rejected": -111.48336791992188, "eval_logps/chosen": -2.004408597946167, "eval_logps/rejected": -1.9918007850646973, "eval_loss": 5.372676849365234, "eval_nll_loss": 5.298208236694336, "eval_rewards/accuracies": 0.46799999475479126, "eval_rewards/chosen": -0.20044086873531342, "eval_rewards/margins": -0.0012607867829501629, "eval_rewards/rejected": -0.19918008148670197, "eval_runtime": 372.018, "eval_samples_per_second": 5.376, "eval_steps_per_second": 2.688, "step": 20 } ], "logging_steps": 20, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }