| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.40336134453781514, |
| "eval_steps": 500, |
| "global_step": 42, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.18055555555555558, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8149.333333333333, |
| "completions/mean_length": 3217.178955078125, |
| "completions/mean_terminated_length": 2105.4993489583335, |
| "completions/min_length": 559.3333333333334, |
| "completions/min_terminated_length": 559.3333333333334, |
| "epoch": 0.028811524609843937, |
| "grad_norm": 0.09286145865917206, |
| "kl": 0.0001745025316874186, |
| "learning_rate": 2.942307692307692e-06, |
| "loss": 0.1215, |
| "num_tokens": 1897631.0, |
| "reward": 1.810763915379842, |
| "reward_std": 0.18809527655442557, |
| "rewards/accuracy_reward/mean": 0.8125, |
| "rewards/accuracy_reward/std": 0.3738736609617869, |
| "rewards/format_reward/mean": 0.9982638955116272, |
| "rewards/format_reward/std": 0.02405626078446706, |
| "step": 3 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.19791666666666666, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7958.0, |
| "completions/mean_length": 3270.2118326822915, |
| "completions/mean_terminated_length": 2069.518107096354, |
| "completions/min_length": 492.0, |
| "completions/min_terminated_length": 492.0, |
| "epoch": 0.057623049219687875, |
| "grad_norm": 0.08364126831293106, |
| "kl": 0.0004324515660603841, |
| "learning_rate": 2.8557692307692307e-06, |
| "loss": 0.0934, |
| "num_tokens": 3822113.0, |
| "reward": 1.7673611640930176, |
| "reward_std": 0.14717517793178558, |
| "rewards/accuracy_reward/mean": 0.7673611044883728, |
| "rewards/accuracy_reward/std": 0.4205925464630127, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 6 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.234375, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7983.666666666667, |
| "completions/mean_length": 3627.2797037760415, |
| "completions/mean_terminated_length": 2248.2770182291665, |
| "completions/min_length": 657.3333333333334, |
| "completions/min_terminated_length": 657.3333333333334, |
| "epoch": 0.08643457382953182, |
| "grad_norm": 0.09271353483200073, |
| "kl": 0.0014851093292236328, |
| "learning_rate": 2.7692307692307693e-06, |
| "loss": 0.1043, |
| "num_tokens": 5951186.0, |
| "reward": 1.73437503973643, |
| "reward_std": 0.1764681041240692, |
| "rewards/accuracy_reward/mean": 0.7361111044883728, |
| "rewards/accuracy_reward/std": 0.4373584985733032, |
| "rewards/format_reward/mean": 0.9982638955116272, |
| "rewards/format_reward/std": 0.02405626078446706, |
| "step": 9 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.11111111111111112, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8012.666666666667, |
| "completions/mean_length": 2749.7119140625, |
| "completions/mean_terminated_length": 2069.81005859375, |
| "completions/min_length": 558.0, |
| "completions/min_terminated_length": 558.0, |
| "epoch": 0.11524609843937575, |
| "grad_norm": 0.06276765465736389, |
| "kl": 0.002723217010498047, |
| "learning_rate": 2.682692307692308e-06, |
| "loss": 0.0766, |
| "num_tokens": 7577284.0, |
| "reward": 1.8333333333333333, |
| "reward_std": 0.1446588784456253, |
| "rewards/accuracy_reward/mean": 0.8350694378217062, |
| "rewards/accuracy_reward/std": 0.36645371715227765, |
| "rewards/format_reward/mean": 0.9982638955116272, |
| "rewards/format_reward/std": 0.02405626078446706, |
| "step": 12 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.11805555555555558, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8078.333333333333, |
| "completions/mean_length": 2837.998291015625, |
| "completions/mean_terminated_length": 2121.116943359375, |
| "completions/min_length": 586.0, |
| "completions/min_terminated_length": 586.0, |
| "epoch": 0.14405762304921968, |
| "grad_norm": 0.06373849511146545, |
| "kl": 0.0064856211344401045, |
| "learning_rate": 2.5961538461538465e-06, |
| "loss": 0.0795, |
| "num_tokens": 9254931.0, |
| "reward": 1.8125000794728596, |
| "reward_std": 0.13619043429692587, |
| "rewards/accuracy_reward/mean": 0.812500019868215, |
| "rewards/accuracy_reward/std": 0.3885917862256368, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 15 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.11284722222222225, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8120.0, |
| "completions/mean_length": 2874.5538736979165, |
| "completions/mean_terminated_length": 2199.148152669271, |
| "completions/min_length": 438.6666666666667, |
| "completions/min_terminated_length": 438.6666666666667, |
| "epoch": 0.17286914765906364, |
| "grad_norm": 0.06650689989328384, |
| "kl": 0.01007843017578125, |
| "learning_rate": 2.5096153846153847e-06, |
| "loss": 0.0729, |
| "num_tokens": 10960010.0, |
| "reward": 1.7986111640930176, |
| "reward_std": 0.1752894123395284, |
| "rewards/accuracy_reward/mean": 0.7986111044883728, |
| "rewards/accuracy_reward/std": 0.3986728588740031, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 18 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.12673611111111108, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8150.666666666667, |
| "completions/mean_length": 3212.4671223958335, |
| "completions/mean_terminated_length": 2483.940673828125, |
| "completions/min_length": 749.6666666666666, |
| "completions/min_terminated_length": 749.6666666666666, |
| "epoch": 0.20168067226890757, |
| "grad_norm": 0.061652038246393204, |
| "kl": 0.011129379272460938, |
| "learning_rate": 2.4230769230769233e-06, |
| "loss": 0.0656, |
| "num_tokens": 12855127.0, |
| "reward": 1.7256944974263508, |
| "reward_std": 0.19639561573664346, |
| "rewards/accuracy_reward/mean": 0.725694457689921, |
| "rewards/accuracy_reward/std": 0.4470636049906413, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 21 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08159722222222221, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8009.0, |
| "completions/mean_length": 2894.3212076822915, |
| "completions/mean_terminated_length": 2424.7539876302085, |
| "completions/min_length": 455.6666666666667, |
| "completions/min_terminated_length": 455.6666666666667, |
| "epoch": 0.2304921968787515, |
| "grad_norm": 0.06359543651342392, |
| "kl": 0.013215382893880209, |
| "learning_rate": 2.3365384615384615e-06, |
| "loss": 0.0777, |
| "num_tokens": 14566568.0, |
| "reward": 1.7986111243565877, |
| "reward_std": 0.20375757416089377, |
| "rewards/accuracy_reward/mean": 0.7986111044883728, |
| "rewards/accuracy_reward/std": 0.3931320408980052, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 24 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.09201388888888891, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7929.666666666667, |
| "completions/mean_length": 2709.375, |
| "completions/mean_terminated_length": 2146.6839599609375, |
| "completions/min_length": 373.6666666666667, |
| "completions/min_terminated_length": 373.6666666666667, |
| "epoch": 0.25930372148859543, |
| "grad_norm": 0.05772211030125618, |
| "kl": 0.017008463541666668, |
| "learning_rate": 2.25e-06, |
| "loss": 0.0681, |
| "num_tokens": 16187976.0, |
| "reward": 1.76562503973643, |
| "reward_std": 0.1697404384613037, |
| "rewards/accuracy_reward/mean": 0.765625, |
| "rewards/accuracy_reward/std": 0.4218848447004954, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 27 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.07638888888888888, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8090.0, |
| "completions/mean_length": 2675.694580078125, |
| "completions/mean_terminated_length": 2217.5712076822915, |
| "completions/min_length": 401.6666666666667, |
| "completions/min_terminated_length": 401.6666666666667, |
| "epoch": 0.28811524609843936, |
| "grad_norm": 0.06288590282201767, |
| "kl": 0.020589192708333332, |
| "learning_rate": 2.1634615384615387e-06, |
| "loss": 0.0697, |
| "num_tokens": 17790312.0, |
| "reward": 1.7760417461395264, |
| "reward_std": 0.20130781332651773, |
| "rewards/accuracy_reward/mean": 0.7777777910232544, |
| "rewards/accuracy_reward/std": 0.4139314691225688, |
| "rewards/format_reward/mean": 0.9982638955116272, |
| "rewards/format_reward/std": 0.02405626078446706, |
| "step": 30 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.08680555555555554, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 8054.0, |
| "completions/mean_length": 2780.01220703125, |
| "completions/mean_terminated_length": 2265.2118326822915, |
| "completions/min_length": 392.6666666666667, |
| "completions/min_terminated_length": 392.6666666666667, |
| "epoch": 0.3169267707082833, |
| "grad_norm": 0.6698484420776367, |
| "kl": 0.03530248006184896, |
| "learning_rate": 2.076923076923077e-06, |
| "loss": 0.0537, |
| "num_tokens": 19449207.0, |
| "reward": 1.7899305820465088, |
| "reward_std": 0.18347221612930298, |
| "rewards/accuracy_reward/mean": 0.789930542310079, |
| "rewards/accuracy_reward/std": 0.40485529104868573, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 33 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.060763888888888916, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7845.0, |
| "completions/mean_length": 2849.2032877604165, |
| "completions/mean_terminated_length": 2505.287841796875, |
| "completions/min_length": 393.3333333333333, |
| "completions/min_terminated_length": 393.3333333333333, |
| "epoch": 0.3457382953181273, |
| "grad_norm": 0.09857647120952606, |
| "kl": 0.032840728759765625, |
| "learning_rate": 1.9903846153846155e-06, |
| "loss": 0.0509, |
| "num_tokens": 21157860.0, |
| "reward": 1.717013955116272, |
| "reward_std": 0.22714433073997498, |
| "rewards/accuracy_reward/mean": 0.7170138955116272, |
| "rewards/accuracy_reward/std": 0.45049455761909485, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 36 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.060763888888888874, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7866.0, |
| "completions/mean_length": 2628.4080403645835, |
| "completions/mean_terminated_length": 2266.409220377604, |
| "completions/min_length": 435.0, |
| "completions/min_terminated_length": 435.0, |
| "epoch": 0.3745498199279712, |
| "grad_norm": 2.40429949760437, |
| "kl": 0.058779398600260414, |
| "learning_rate": 1.9038461538461538e-06, |
| "loss": 0.0515, |
| "num_tokens": 22722487.0, |
| "reward": 1.7274306217829387, |
| "reward_std": 0.24683435757954916, |
| "rewards/accuracy_reward/mean": 0.7274305621782938, |
| "rewards/accuracy_reward/std": 0.4455043375492096, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 39 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.048611111111111126, |
| "completions/max_length": 8192.0, |
| "completions/max_terminated_length": 7949.0, |
| "completions/mean_length": 2644.7657063802085, |
| "completions/mean_terminated_length": 2360.8841959635415, |
| "completions/min_length": 295.3333333333333, |
| "completions/min_terminated_length": 295.3333333333333, |
| "epoch": 0.40336134453781514, |
| "grad_norm": 0.7801563739776611, |
| "kl": 0.0440673828125, |
| "learning_rate": 1.8173076923076922e-06, |
| "loss": 0.0578, |
| "num_tokens": 24298560.0, |
| "reward": 1.7239583730697632, |
| "reward_std": 0.24683218201001486, |
| "rewards/accuracy_reward/mean": 0.7239583333333334, |
| "rewards/accuracy_reward/std": 0.44757089018821716, |
| "rewards/format_reward/mean": 1.0, |
| "rewards/format_reward/std": 0.0, |
| "step": 42 |
| } |
| ], |
| "logging_steps": 3, |
| "max_steps": 104, |
| "num_input_tokens_seen": 24298560, |
| "num_train_epochs": 1, |
| "save_steps": 21, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|