| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9893390191897654, |
| "eval_steps": 100, |
| "global_step": 58, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 442.65068969726565, |
| "epoch": 0.08528784648187633, |
| "grad_norm": 6.35717248916626, |
| "kl": 0.0004929542541503906, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0, |
| "reward": 0.41696430593729017, |
| "reward_std": 0.4544539041817188, |
| "rewards/accuracy_reward": 0.12834822051227093, |
| "rewards/format_reward": 0.28861608617007734, |
| "step": 5 |
| }, |
| { |
| "completion_length": 304.9187644958496, |
| "epoch": 0.17057569296375266, |
| "grad_norm": 2.1602344512939453, |
| "kl": 0.026971435546875, |
| "learning_rate": 2.956412726139078e-06, |
| "loss": 0.0011, |
| "reward": 0.986160759627819, |
| "reward_std": 0.3372926861047745, |
| "rewards/accuracy_reward": 0.11272321967408061, |
| "rewards/format_reward": 0.873437537252903, |
| "step": 10 |
| }, |
| { |
| "completion_length": 365.3803741455078, |
| "epoch": 0.255863539445629, |
| "grad_norm": 0.33471256494522095, |
| "kl": 0.03214263916015625, |
| "learning_rate": 2.7836719084521715e-06, |
| "loss": 0.0013, |
| "reward": 1.298883980512619, |
| "reward_std": 0.41679972484707833, |
| "rewards/accuracy_reward": 0.37522323429584503, |
| "rewards/format_reward": 0.923660759627819, |
| "step": 15 |
| }, |
| { |
| "completion_length": 378.46050758361815, |
| "epoch": 0.3411513859275053, |
| "grad_norm": 1.4841564893722534, |
| "kl": 0.0420074462890625, |
| "learning_rate": 2.4946839873611927e-06, |
| "loss": 0.0017, |
| "reward": 1.4825893670320511, |
| "reward_std": 0.4033116206526756, |
| "rewards/accuracy_reward": 0.5584821693599225, |
| "rewards/format_reward": 0.9241071850061416, |
| "step": 20 |
| }, |
| { |
| "completion_length": 394.2165367126465, |
| "epoch": 0.42643923240938164, |
| "grad_norm": 0.28560909628868103, |
| "kl": 0.0350738525390625, |
| "learning_rate": 2.1156192081791355e-06, |
| "loss": 0.0014, |
| "reward": 1.512500062584877, |
| "reward_std": 0.3465679492801428, |
| "rewards/accuracy_reward": 0.5779018126428127, |
| "rewards/format_reward": 0.9345982566475868, |
| "step": 25 |
| }, |
| { |
| "completion_length": 401.2672065734863, |
| "epoch": 0.511727078891258, |
| "grad_norm": 0.5306402444839478, |
| "kl": 0.034088134765625, |
| "learning_rate": 1.6808050203829845e-06, |
| "loss": 0.0014, |
| "reward": 1.5180804193019868, |
| "reward_std": 0.3442438319325447, |
| "rewards/accuracy_reward": 0.5779018104076385, |
| "rewards/format_reward": 0.940178607404232, |
| "step": 30 |
| }, |
| { |
| "completion_length": 393.07434768676757, |
| "epoch": 0.5970149253731343, |
| "grad_norm": 0.2967917323112488, |
| "kl": 0.0385345458984375, |
| "learning_rate": 1.2296174432791415e-06, |
| "loss": 0.0015, |
| "reward": 1.5225447058677672, |
| "reward_std": 0.34006611779332163, |
| "rewards/accuracy_reward": 0.5747768118977546, |
| "rewards/format_reward": 0.9477679014205933, |
| "step": 35 |
| }, |
| { |
| "completion_length": 390.0587242126465, |
| "epoch": 0.6823027718550106, |
| "grad_norm": 0.358806312084198, |
| "kl": 0.0369537353515625, |
| "learning_rate": 8.029152419343472e-07, |
| "loss": 0.0015, |
| "reward": 1.572321492433548, |
| "reward_std": 0.313805010356009, |
| "rewards/accuracy_reward": 0.6136160999536514, |
| "rewards/format_reward": 0.9587054014205932, |
| "step": 40 |
| }, |
| { |
| "completion_length": 418.40091094970705, |
| "epoch": 0.767590618336887, |
| "grad_norm": 0.2598155736923218, |
| "kl": 0.03328857421875, |
| "learning_rate": 4.3933982822017883e-07, |
| "loss": 0.0013, |
| "reward": 1.5649554222822188, |
| "reward_std": 0.3114229992032051, |
| "rewards/accuracy_reward": 0.6140625312924385, |
| "rewards/format_reward": 0.9508929029107094, |
| "step": 45 |
| }, |
| { |
| "completion_length": 422.549129486084, |
| "epoch": 0.8528784648187633, |
| "grad_norm": 0.2509433925151825, |
| "kl": 0.034161376953125, |
| "learning_rate": 1.718159615201853e-07, |
| "loss": 0.0014, |
| "reward": 1.5645089954137803, |
| "reward_std": 0.3242972683161497, |
| "rewards/accuracy_reward": 0.6129464544355869, |
| "rewards/format_reward": 0.9515625432133674, |
| "step": 50 |
| }, |
| { |
| "completion_length": 419.257829284668, |
| "epoch": 0.9381663113006397, |
| "grad_norm": 0.3123314082622528, |
| "kl": 0.0333709716796875, |
| "learning_rate": 2.4570139579284723e-08, |
| "loss": 0.0013, |
| "reward": 1.5895090013742448, |
| "reward_std": 0.326204876601696, |
| "rewards/accuracy_reward": 0.6330357410013676, |
| "rewards/format_reward": 0.9564732536673546, |
| "step": 55 |
| }, |
| { |
| "completion_length": 415.0018781026204, |
| "epoch": 0.9893390191897654, |
| "kl": 0.03472900390625, |
| "reward": 1.5885417411724727, |
| "reward_std": 0.29818206280469894, |
| "rewards/accuracy_reward": 0.6279762176175913, |
| "rewards/format_reward": 0.9605655198295912, |
| "step": 58, |
| "total_flos": 0.0, |
| "train_loss": 0.0013394545973730192, |
| "train_runtime": 6585.1769, |
| "train_samples_per_second": 1.139, |
| "train_steps_per_second": 0.009 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 58, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|