| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3410059676044331, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 140.53125, | |
| "epoch": 0.03410059676044331, | |
| "grad_norm": 0.267546683549881, | |
| "kl": 0.0008768478994170437, | |
| "learning_rate": 4.936026311617316e-07, | |
| "loss": 0.0, | |
| "reward": 1.040625, | |
| "reward_std": 0.13700193651020526, | |
| "rewards/correctness_reward_func_factual": 0.1140625, | |
| "rewards/format_reward_func_factual": 0.9265625, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 145.9953125, | |
| "epoch": 0.06820119352088662, | |
| "grad_norm": 0.050062697380781174, | |
| "kl": 0.0010662404758477351, | |
| "learning_rate": 4.630542059139923e-07, | |
| "loss": 0.0, | |
| "reward": 0.9890625, | |
| "reward_std": 0.12595339305698872, | |
| "rewards/correctness_reward_func_factual": 0.071875, | |
| "rewards/format_reward_func_factual": 0.9171875, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 146.1140625, | |
| "epoch": 0.10230179028132992, | |
| "grad_norm": 0.18261057138442993, | |
| "kl": 0.0010776295835967175, | |
| "learning_rate": 4.1035205490778496e-07, | |
| "loss": 0.0, | |
| "reward": 0.9875, | |
| "reward_std": 0.1414213538169861, | |
| "rewards/correctness_reward_func_factual": 0.0671875, | |
| "rewards/format_reward_func_factual": 0.9203125, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 141.715625, | |
| "epoch": 0.13640238704177324, | |
| "grad_norm": 0.25042295455932617, | |
| "kl": 0.0010743443999672309, | |
| "learning_rate": 3.409762342408719e-07, | |
| "loss": 0.0, | |
| "reward": 1.0390625, | |
| "reward_std": 0.1392116451635957, | |
| "rewards/correctness_reward_func_factual": 0.103125, | |
| "rewards/format_reward_func_factual": 0.9359375, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 147.79375, | |
| "epoch": 0.17050298380221654, | |
| "grad_norm": 0.1481354683637619, | |
| "kl": 0.0011524090303282719, | |
| "learning_rate": 2.621405555286121e-07, | |
| "loss": 0.0, | |
| "reward": 1.0125, | |
| "reward_std": 0.12816310189664365, | |
| "rewards/correctness_reward_func_factual": 0.078125, | |
| "rewards/format_reward_func_factual": 0.934375, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 144.015625, | |
| "epoch": 0.20460358056265984, | |
| "grad_norm": 0.06519610434770584, | |
| "kl": 0.001239983293635305, | |
| "learning_rate": 1.8204248194091425e-07, | |
| "loss": 0.0, | |
| "reward": 0.98125, | |
| "reward_std": 0.12816310189664365, | |
| "rewards/correctness_reward_func_factual": 0.05, | |
| "rewards/format_reward_func_factual": 0.93125, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 144.9734375, | |
| "epoch": 0.23870417732310314, | |
| "grad_norm": 0.055830299854278564, | |
| "kl": 0.0012718188016151543, | |
| "learning_rate": 1.0901074253727336e-07, | |
| "loss": 0.0, | |
| "reward": 0.9984375, | |
| "reward_std": 0.1524698968976736, | |
| "rewards/correctness_reward_func_factual": 0.0796875, | |
| "rewards/format_reward_func_factual": 0.91875, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 145.4859375, | |
| "epoch": 0.2728047740835465, | |
| "grad_norm": 0.4993789792060852, | |
| "kl": 0.0011785521975980374, | |
| "learning_rate": 5.0639297359319846e-08, | |
| "loss": 0.0, | |
| "reward": 1.003125, | |
| "reward_std": 0.13258251920342445, | |
| "rewards/correctness_reward_func_factual": 0.075, | |
| "rewards/format_reward_func_factual": 0.928125, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 145.9609375, | |
| "epoch": 0.3069053708439898, | |
| "grad_norm": 0.08759493380784988, | |
| "kl": 0.0012535244946775491, | |
| "learning_rate": 1.2997705010932391e-08, | |
| "loss": 0.0, | |
| "reward": 0.990625, | |
| "reward_std": 0.1414213538169861, | |
| "rewards/correctness_reward_func_factual": 0.078125, | |
| "rewards/format_reward_func_factual": 0.9125, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 146.890625, | |
| "epoch": 0.3410059676044331, | |
| "grad_norm": 0.5095328688621521, | |
| "kl": 0.0012054607454047074, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "reward": 1.021875, | |
| "reward_std": 0.11048543266952038, | |
| "rewards/correctness_reward_func_factual": 0.0765625, | |
| "rewards/format_reward_func_factual": 0.9453125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3410059676044331, | |
| "step": 100, | |
| "total_flos": 0.0, | |
| "train_loss": 1.139334672188852e-06, | |
| "train_runtime": 11009.3263, | |
| "train_samples_per_second": 0.291, | |
| "train_steps_per_second": 0.009 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |