| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.8, | |
| "eval_steps": 500, | |
| "global_step": 20, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 794.7712326049805, | |
| "epoch": 0.2, | |
| "grad_norm": 0.1648624688386917, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0129, | |
| "reward": 0.1897321529686451, | |
| "reward_std": 0.19452152773737907, | |
| "rewards/accuracy_reward": 0.16183036379516125, | |
| "rewards/format_reward": 0.027901787078008056, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 788.7571487426758, | |
| "epoch": 1.2, | |
| "grad_norm": 0.2475416511297226, | |
| "kl": 0.0007533133029937744, | |
| "learning_rate": 2.9392394604217463e-06, | |
| "loss": 0.024, | |
| "reward": 0.16434152494184673, | |
| "reward_std": 0.19889171607792377, | |
| "rewards/accuracy_reward": 0.13504464935977012, | |
| "rewards/format_reward": 0.029296876542503014, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 780.9437911987304, | |
| "epoch": 2.4, | |
| "grad_norm": 0.29467591643333435, | |
| "kl": 0.030749130249023437, | |
| "learning_rate": 2.3109612261833968e-06, | |
| "loss": 0.0394, | |
| "reward": 0.23705358430743217, | |
| "reward_std": 0.29603362139314415, | |
| "rewards/accuracy_reward": 0.13928571976721288, | |
| "rewards/format_reward": 0.09776786174625159, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 736.5603805541992, | |
| "epoch": 3.6, | |
| "grad_norm": 0.3751678168773651, | |
| "kl": 0.06479034423828126, | |
| "learning_rate": 1.2865277425900725e-06, | |
| "loss": 0.0791, | |
| "reward": 0.4093750208616257, | |
| "reward_std": 0.43266602158546447, | |
| "rewards/accuracy_reward": 0.12991072016302496, | |
| "rewards/format_reward": 0.2794642996042967, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 690.4044540405273, | |
| "epoch": 4.8, | |
| "grad_norm": 0.3264864385128021, | |
| "kl": 0.0665924072265625, | |
| "learning_rate": 3.6637563846861275e-07, | |
| "loss": 0.0782, | |
| "reward": 0.5441964529454708, | |
| "reward_std": 0.4915049530565739, | |
| "rewards/accuracy_reward": 0.118750005424954, | |
| "rewards/format_reward": 0.4254464492201805, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "step": 20, | |
| "total_flos": 0.0, | |
| "train_loss": 0.054626915324479344, | |
| "train_runtime": 6323.8598, | |
| "train_samples_per_second": 0.499, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 25, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |