{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3410059676044331, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 140.53125, "epoch": 0.03410059676044331, "grad_norm": 0.267546683549881, "kl": 0.0008768478994170437, "learning_rate": 4.936026311617316e-07, "loss": 0.0, "reward": 1.040625, "reward_std": 0.13700193651020526, "rewards/correctness_reward_func_factual": 0.1140625, "rewards/format_reward_func_factual": 0.9265625, "step": 10 }, { "completion_length": 145.9953125, "epoch": 0.06820119352088662, "grad_norm": 0.050062697380781174, "kl": 0.0010662404758477351, "learning_rate": 4.630542059139923e-07, "loss": 0.0, "reward": 0.9890625, "reward_std": 0.12595339305698872, "rewards/correctness_reward_func_factual": 0.071875, "rewards/format_reward_func_factual": 0.9171875, "step": 20 }, { "completion_length": 146.1140625, "epoch": 0.10230179028132992, "grad_norm": 0.18261057138442993, "kl": 0.0010776295835967175, "learning_rate": 4.1035205490778496e-07, "loss": 0.0, "reward": 0.9875, "reward_std": 0.1414213538169861, "rewards/correctness_reward_func_factual": 0.0671875, "rewards/format_reward_func_factual": 0.9203125, "step": 30 }, { "completion_length": 141.715625, "epoch": 0.13640238704177324, "grad_norm": 0.25042295455932617, "kl": 0.0010743443999672309, "learning_rate": 3.409762342408719e-07, "loss": 0.0, "reward": 1.0390625, "reward_std": 0.1392116451635957, "rewards/correctness_reward_func_factual": 0.103125, "rewards/format_reward_func_factual": 0.9359375, "step": 40 }, { "completion_length": 147.79375, "epoch": 0.17050298380221654, "grad_norm": 0.1481354683637619, "kl": 0.0011524090303282719, "learning_rate": 2.621405555286121e-07, "loss": 0.0, "reward": 1.0125, "reward_std": 0.12816310189664365, "rewards/correctness_reward_func_factual": 0.078125, "rewards/format_reward_func_factual": 0.934375, "step": 50 }, { "completion_length": 144.015625, "epoch": 0.20460358056265984, "grad_norm": 0.06519610434770584, "kl": 0.001239983293635305, "learning_rate": 1.8204248194091425e-07, "loss": 0.0, "reward": 0.98125, "reward_std": 0.12816310189664365, "rewards/correctness_reward_func_factual": 0.05, "rewards/format_reward_func_factual": 0.93125, "step": 60 }, { "completion_length": 144.9734375, "epoch": 0.23870417732310314, "grad_norm": 0.055830299854278564, "kl": 0.0012718188016151543, "learning_rate": 1.0901074253727336e-07, "loss": 0.0, "reward": 0.9984375, "reward_std": 0.1524698968976736, "rewards/correctness_reward_func_factual": 0.0796875, "rewards/format_reward_func_factual": 0.91875, "step": 70 }, { "completion_length": 145.4859375, "epoch": 0.2728047740835465, "grad_norm": 0.4993789792060852, "kl": 0.0011785521975980374, "learning_rate": 5.0639297359319846e-08, "loss": 0.0, "reward": 1.003125, "reward_std": 0.13258251920342445, "rewards/correctness_reward_func_factual": 0.075, "rewards/format_reward_func_factual": 0.928125, "step": 80 }, { "completion_length": 145.9609375, "epoch": 0.3069053708439898, "grad_norm": 0.08759493380784988, "kl": 0.0012535244946775491, "learning_rate": 1.2997705010932391e-08, "loss": 0.0, "reward": 0.990625, "reward_std": 0.1414213538169861, "rewards/correctness_reward_func_factual": 0.078125, "rewards/format_reward_func_factual": 0.9125, "step": 90 }, { "completion_length": 146.890625, "epoch": 0.3410059676044331, "grad_norm": 0.5095328688621521, "kl": 0.0012054607454047074, "learning_rate": 0.0, "loss": 0.0, "reward": 1.021875, "reward_std": 0.11048543266952038, "rewards/correctness_reward_func_factual": 0.0765625, "rewards/format_reward_func_factual": 0.9453125, "step": 100 }, { "epoch": 0.3410059676044331, "step": 100, "total_flos": 0.0, "train_loss": 1.139334672188852e-06, "train_runtime": 11009.3263, "train_samples_per_second": 0.291, "train_steps_per_second": 0.009 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }