| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.1504, | |
| "eval_steps": 50, | |
| "global_step": 47, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0298828125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1387.8, | |
| "completions/mean_length": 117.6021484375, | |
| "completions/mean_terminated_length": 73.92077941894532, | |
| "completions/min_length": 1.0, | |
| "completions/min_terminated_length": 1.0, | |
| "epoch": 0.016, | |
| "grad_norm": 0.0009012475493364036, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0023, | |
| "num_tokens": 12904598.0, | |
| "reward": 0.003271484375, | |
| "reward_std": 0.016773892380297185, | |
| "rewards/accuracy_reward": 0.0001953125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.00634765625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04052734375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1399.0, | |
| "completions/mean_length": 144.84462890625, | |
| "completions/mean_terminated_length": 86.13726043701172, | |
| "completions/min_length": 1.0, | |
| "completions/min_terminated_length": 1.0, | |
| "epoch": 0.032, | |
| "grad_norm": 0.005089475307613611, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0069, | |
| "num_tokens": 26344479.0, | |
| "reward": 0.02197265625, | |
| "reward_std": 0.0738394245505333, | |
| "rewards/accuracy_reward": 0.006640625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.0373046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05029296875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1470.0, | |
| "completions/mean_length": 218.273046875, | |
| "completions/mean_terminated_length": 148.43575439453124, | |
| "completions/min_length": 1.6, | |
| "completions/min_terminated_length": 1.6, | |
| "epoch": 0.048, | |
| "grad_norm": 0.003542242106050253, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0482, | |
| "num_tokens": 40484651.0, | |
| "reward": 0.2947265625, | |
| "reward_std": 0.2773519217967987, | |
| "rewards/accuracy_reward": 0.0857421875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.5037109375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0291015625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1409.0, | |
| "completions/mean_length": 187.0353515625, | |
| "completions/mean_terminated_length": 146.611474609375, | |
| "completions/min_length": 11.8, | |
| "completions/min_terminated_length": 11.8, | |
| "epoch": 0.064, | |
| "grad_norm": 0.0016335330437868834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0656, | |
| "num_tokens": 54174613.0, | |
| "reward": 0.524755859375, | |
| "reward_std": 0.23584804832935333, | |
| "rewards/accuracy_reward": 0.17412109375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.875390625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0296875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1326.8, | |
| "completions/mean_length": 168.64677734375, | |
| "completions/mean_terminated_length": 126.81468353271484, | |
| "completions/min_length": 18.4, | |
| "completions/min_terminated_length": 18.4, | |
| "epoch": 0.08, | |
| "grad_norm": 0.001310898456722498, | |
| "learning_rate": 1e-06, | |
| "loss": 0.068, | |
| "num_tokens": 67691028.0, | |
| "reward": 0.577392578125, | |
| "reward_std": 0.1859208643436432, | |
| "rewards/accuracy_reward": 0.20341796875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9513671875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02001953125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1291.6, | |
| "completions/mean_length": 145.42451171875, | |
| "completions/mean_terminated_length": 117.01493835449219, | |
| "completions/min_length": 18.2, | |
| "completions/min_terminated_length": 18.2, | |
| "epoch": 0.096, | |
| "grad_norm": 0.001353453379124403, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0497, | |
| "num_tokens": 81081103.0, | |
| "reward": 0.6099609375, | |
| "reward_std": 0.15915196239948273, | |
| "rewards/accuracy_reward": 0.24267578125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.97724609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01162109375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1185.4, | |
| "completions/mean_length": 123.5869140625, | |
| "completions/mean_terminated_length": 106.98431701660157, | |
| "completions/min_length": 23.4, | |
| "completions/min_terminated_length": 23.4, | |
| "epoch": 0.112, | |
| "grad_norm": 0.0013827328803017735, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0296, | |
| "num_tokens": 94312425.0, | |
| "reward": 0.63564453125, | |
| "reward_std": 0.14015594720840455, | |
| "rewards/accuracy_reward": 0.2837890625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00498046875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 955.8, | |
| "completions/mean_length": 107.708203125, | |
| "completions/mean_terminated_length": 100.56294860839844, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 0.128, | |
| "grad_norm": 0.0011756919557228684, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0155, | |
| "num_tokens": 107188349.0, | |
| "reward": 0.64580078125, | |
| "reward_std": 0.12910378873348236, | |
| "rewards/accuracy_reward": 0.29716796875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99443359375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.003515625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 936.2, | |
| "completions/mean_length": 101.2041015625, | |
| "completions/mean_terminated_length": 96.14408721923829, | |
| "completions/min_length": 23.4, | |
| "completions/min_terminated_length": 23.4, | |
| "epoch": 0.144, | |
| "grad_norm": 0.001248349086381495, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0119, | |
| "num_tokens": 120031431.0, | |
| "reward": 0.6857421875, | |
| "reward_std": 0.13882396817207338, | |
| "rewards/accuracy_reward": 0.3751953125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9962890625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0029296875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 787.0, | |
| "completions/mean_length": 102.634033203125, | |
| "completions/mean_terminated_length": 98.42235565185547, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 0.1504, | |
| "num_tokens": 125106124.0, | |
| "reward": 0.6888427734375, | |
| "reward_std": 0.13154470920562744, | |
| "rewards/accuracy_reward": 0.380859375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.996826171875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 47, | |
| "total_flos": 0.0, | |
| "train_loss": 0.030232181475359075, | |
| "train_runtime": 7886.8615, | |
| "train_samples_per_second": 0.38, | |
| "train_steps_per_second": 0.006 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 47, | |
| "num_input_tokens_seen": 125106124, | |
| "num_train_epochs": 1, | |
| "save_steps": 60, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |