| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "eval_steps": 10, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 941.515625, | |
| "epoch": 0.5, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5e-08, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 957.15625, | |
| "epoch": 1.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 947.609375, | |
| "epoch": 1.5, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 905.96875, | |
| "epoch": 2.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 934.28125, | |
| "epoch": 2.5, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 842.84375, | |
| "epoch": 3.0, | |
| "grad_norm": 0.4104181280494549, | |
| "kl": 0.0, | |
| "learning_rate": 3e-07, | |
| "loss": 0.0097, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1060.203125, | |
| "epoch": 3.5, | |
| "grad_norm": 0.006270841170194027, | |
| "kl": 0.0004673004150390625, | |
| "learning_rate": 3.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 846.78125, | |
| "epoch": 4.0, | |
| "grad_norm": 0.18416362793200072, | |
| "kl": 0.0004229545593261719, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0438, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 939.875, | |
| "epoch": 4.5, | |
| "grad_norm": 0.004467489660870973, | |
| "kl": 0.00043773651123046875, | |
| "learning_rate": 4.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.003060223517037833, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 914.8203125, | |
| "eval_kl": 0.0004506111145019531, | |
| "eval_loss": 0.0222869124263525, | |
| "eval_reward": 0.0078125, | |
| "eval_reward_std": 0.03125, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0078125, | |
| "eval_runtime": 102.8292, | |
| "eval_samples_per_second": 0.078, | |
| "eval_steps_per_second": 0.01, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 931.875, | |
| "epoch": 5.5, | |
| "grad_norm": 0.0029719679687620396, | |
| "kl": 0.0004215240478515625, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 840.71875, | |
| "epoch": 6.0, | |
| "grad_norm": 0.003151997120265335, | |
| "kl": 0.00043010711669921875, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1021.125, | |
| "epoch": 6.5, | |
| "grad_norm": 0.00293567226380183, | |
| "kl": 0.00043010711669921875, | |
| "learning_rate": 6.5e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 901.53125, | |
| "epoch": 7.0, | |
| "grad_norm": 0.0029155273721110384, | |
| "kl": 0.0004425048828125, | |
| "learning_rate": 7e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 882.046875, | |
| "epoch": 7.5, | |
| "grad_norm": 0.24400359215564857, | |
| "kl": 0.00048542022705078125, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.0581, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 957.84375, | |
| "epoch": 8.0, | |
| "grad_norm": 0.0030236272302122703, | |
| "kl": 0.0004591941833496094, | |
| "learning_rate": 8e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 878.1875, | |
| "epoch": 8.5, | |
| "grad_norm": 0.005634063780769768, | |
| "kl": 0.0004887580871582031, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 840.09375, | |
| "epoch": 9.0, | |
| "grad_norm": 0.0033459555512032678, | |
| "kl": 0.0004935264587402344, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 949.34375, | |
| "epoch": 9.5, | |
| "grad_norm": 0.1871077676079831, | |
| "kl": 0.00047397613525390625, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": 0.0603, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.48673171940966303, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0174, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 883.921875, | |
| "eval_kl": 0.00046753883361816406, | |
| "eval_loss": 1.7391932487953454e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 98.7699, | |
| "eval_samples_per_second": 0.081, | |
| "eval_steps_per_second": 0.01, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 912.484375, | |
| "epoch": 10.5, | |
| "grad_norm": 0.3321658665461902, | |
| "kl": 0.0004425048828125, | |
| "learning_rate": 9.99931462820376e-07, | |
| "loss": 0.0141, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 749.5625, | |
| "epoch": 11.0, | |
| "grad_norm": 0.003786135169019501, | |
| "kl": 0.00048828125, | |
| "learning_rate": 9.997258721585931e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 927.234375, | |
| "epoch": 11.5, | |
| "grad_norm": 0.002638559729174169, | |
| "kl": 0.0004210472106933594, | |
| "learning_rate": 9.993832906395582e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 847.6875, | |
| "epoch": 12.0, | |
| "grad_norm": 0.0037353511628820143, | |
| "kl": 0.0004801750183105469, | |
| "learning_rate": 9.989038226169207e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 937.25, | |
| "epoch": 12.5, | |
| "grad_norm": 0.0041230093967703964, | |
| "kl": 0.00046634674072265625, | |
| "learning_rate": 9.982876141412855e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 936.625, | |
| "epoch": 13.0, | |
| "grad_norm": 0.0048191989969733635, | |
| "kl": 0.0005083084106445312, | |
| "learning_rate": 9.975348529157229e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 860.578125, | |
| "epoch": 13.5, | |
| "grad_norm": 0.004235845464067776, | |
| "kl": 0.00048828125, | |
| "learning_rate": 9.96645768238595e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 842.3125, | |
| "epoch": 14.0, | |
| "grad_norm": 0.0035023722132941533, | |
| "kl": 0.0004673004150390625, | |
| "learning_rate": 9.956206309337066e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 876.859375, | |
| "epoch": 14.5, | |
| "grad_norm": 0.0054294046901942426, | |
| "kl": 0.0005173683166503906, | |
| "learning_rate": 9.944597532678119e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.0031965438782979136, | |
| "learning_rate": 9.931634888554935e-07, | |
| "loss": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 986.3515625, | |
| "eval_kl": 0.0004820823669433594, | |
| "eval_loss": 1.6995334590319544e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 108.5726, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.009, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 900.5234375, | |
| "epoch": 15.5, | |
| "grad_norm": 0.004036025288001792, | |
| "kl": 0.000476837158203125, | |
| "learning_rate": 9.917322325514487e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 934.3125, | |
| "epoch": 16.0, | |
| "grad_norm": 0.006195144986033682, | |
| "kl": 0.0006299018859863281, | |
| "learning_rate": 9.901664203302124e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 920.390625, | |
| "epoch": 16.5, | |
| "grad_norm": 0.0059456824839291, | |
| "kl": 0.0005106925964355469, | |
| "learning_rate": 9.88466529153356e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 879.125, | |
| "epoch": 17.0, | |
| "grad_norm": 0.21435517887811947, | |
| "kl": 0.000614166259765625, | |
| "learning_rate": 9.866330768241983e-07, | |
| "loss": 0.0454, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 871.34375, | |
| "epoch": 17.5, | |
| "grad_norm": 0.0066464547863155045, | |
| "kl": 0.0006079673767089844, | |
| "learning_rate": 9.846666218300807e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 802.6875, | |
| "epoch": 18.0, | |
| "grad_norm": 0.2319706301168723, | |
| "kl": 0.0005307197570800781, | |
| "learning_rate": 9.825677631722435e-07, | |
| "loss": 0.0365, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 898.46875, | |
| "epoch": 18.5, | |
| "grad_norm": 0.007249327675941041, | |
| "kl": 0.0006723403930664062, | |
| "learning_rate": 9.80337140183366e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 902.125, | |
| "epoch": 19.0, | |
| "grad_norm": 0.007027408347733654, | |
| "kl": 0.0005617141723632812, | |
| "learning_rate": 9.779754323328192e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 876.8125, | |
| "epoch": 19.5, | |
| "grad_norm": 0.24622076919830327, | |
| "kl": 0.0005426406860351562, | |
| "learning_rate": 9.754833590196926e-07, | |
| "loss": 0.0332, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0061991261734385215, | |
| "learning_rate": 9.728616793536587e-07, | |
| "loss": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 896.96875, | |
| "eval_kl": 0.0005970001220703125, | |
| "eval_loss": 2.167217098758556e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 103.8756, | |
| "eval_samples_per_second": 0.077, | |
| "eval_steps_per_second": 0.01, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 826.8203125, | |
| "epoch": 20.5, | |
| "grad_norm": 0.36000165453483507, | |
| "kl": 0.0005981922149658203, | |
| "learning_rate": 9.701111919237408e-07, | |
| "loss": 0.0126, | |
| "reward": 0.0078125, | |
| "reward_std": 0.03125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0078125, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 848.5, | |
| "epoch": 21.0, | |
| "grad_norm": 0.010730892722707015, | |
| "kl": 0.0006737709045410156, | |
| "learning_rate": 9.672327345550543e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 815.03125, | |
| "epoch": 21.5, | |
| "grad_norm": 0.4479793536674205, | |
| "kl": 0.00072479248046875, | |
| "learning_rate": 9.64227184053598e-07, | |
| "loss": 0.0694, | |
| "reward": 0.03125, | |
| "reward_std": 0.125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1039.59375, | |
| "epoch": 22.0, | |
| "grad_norm": 0.005097432717583717, | |
| "kl": 0.0005826950073242188, | |
| "learning_rate": 9.610954559391704e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 913.5, | |
| "epoch": 22.5, | |
| "grad_norm": 0.005095664845686755, | |
| "kl": 0.0005693435668945312, | |
| "learning_rate": 9.578385041664925e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 982.3125, | |
| "epoch": 23.0, | |
| "grad_norm": 0.008066078534943774, | |
| "kl": 0.0007781982421875, | |
| "learning_rate": 9.54457320834625e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 940.015625, | |
| "epoch": 23.5, | |
| "grad_norm": 0.007637537595000865, | |
| "kl": 0.0008058547973632812, | |
| "learning_rate": 9.509529358847654e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 968.125, | |
| "epoch": 24.0, | |
| "grad_norm": 0.3097871453627547, | |
| "kl": 0.0007429122924804688, | |
| "learning_rate": 9.473264167865171e-07, | |
| "loss": 0.0144, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1062.859375, | |
| "epoch": 24.5, | |
| "grad_norm": 0.005902735175410845, | |
| "kl": 0.0006656646728515625, | |
| "learning_rate": 9.43578868212728e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.007727395228257341, | |
| "learning_rate": 9.397114317029974e-07, | |
| "loss": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 896.96875, | |
| "eval_kl": 0.000972747802734375, | |
| "eval_loss": 2.9567998353741132e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 112.2758, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.009, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 945.0, | |
| "epoch": 25.5, | |
| "grad_norm": 0.008522169760744136, | |
| "kl": 0.0008330345153808594, | |
| "learning_rate": 9.357252853159505e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 916.71875, | |
| "epoch": 26.0, | |
| "grad_norm": 0.007229054230337371, | |
| "kl": 0.0007457733154296875, | |
| "learning_rate": 9.316216432703916e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 848.4375, | |
| "epoch": 26.5, | |
| "grad_norm": 0.009711666023450752, | |
| "kl": 0.00109100341796875, | |
| "learning_rate": 9.274017555754407e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1112.53125, | |
| "epoch": 27.0, | |
| "grad_norm": 0.007287564769621722, | |
| "kl": 0.0007829666137695312, | |
| "learning_rate": 9.230669076497687e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 901.5625, | |
| "epoch": 27.5, | |
| "grad_norm": 0.010695472961060359, | |
| "kl": 0.0011224746704101562, | |
| "learning_rate": 9.186184199300463e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1000.40625, | |
| "epoch": 28.0, | |
| "grad_norm": 0.012234323967281642, | |
| "kl": 0.0009059906005859375, | |
| "learning_rate": 9.140576474687263e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1010.1875, | |
| "epoch": 28.5, | |
| "grad_norm": 0.009177302889350094, | |
| "kl": 0.0009927749633789062, | |
| "learning_rate": 9.093859795212817e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 910.34375, | |
| "epoch": 29.0, | |
| "grad_norm": 0.011497131889061145, | |
| "kl": 0.0014095306396484375, | |
| "learning_rate": 9.046048391230247e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 846.640625, | |
| "epoch": 29.5, | |
| "grad_norm": 0.009586334839630907, | |
| "kl": 0.001155853271484375, | |
| "learning_rate": 8.997156826556369e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.01647829222650522, | |
| "learning_rate": 8.9471999940354e-07, | |
| "loss": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 907.8828125, | |
| "eval_kl": 0.0010533332824707031, | |
| "eval_loss": 0.021646613255143166, | |
| "eval_reward": 0.0078125, | |
| "eval_reward_std": 0.03125, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0078125, | |
| "eval_runtime": 107.6329, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.009, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 920.4375, | |
| "epoch": 30.5, | |
| "grad_norm": 0.0168591367199892, | |
| "kl": 0.0012812614440917969, | |
| "learning_rate": 8.896193111002475e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 970.28125, | |
| "epoch": 31.0, | |
| "grad_norm": 0.008710794047022944, | |
| "kl": 0.0010786056518554688, | |
| "learning_rate": 8.844151714648274e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 957.59375, | |
| "epoch": 31.5, | |
| "grad_norm": 0.012243414806160322, | |
| "kl": 0.0009698867797851562, | |
| "learning_rate": 8.791091657286267e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 855.59375, | |
| "epoch": 32.0, | |
| "grad_norm": 0.015193617397552813, | |
| "kl": 0.001148223876953125, | |
| "learning_rate": 8.737029101523929e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1009.546875, | |
| "epoch": 32.5, | |
| "grad_norm": 0.007369522915137431, | |
| "kl": 0.0008792877197265625, | |
| "learning_rate": 8.681980515339463e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 868.84375, | |
| "epoch": 33.0, | |
| "grad_norm": 0.015494723417675436, | |
| "kl": 0.0012235641479492188, | |
| "learning_rate": 8.625962667065487e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 877.046875, | |
| "epoch": 33.5, | |
| "grad_norm": 0.022596736280910343, | |
| "kl": 0.0012197494506835938, | |
| "learning_rate": 8.568992620281243e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1001.34375, | |
| "epoch": 34.0, | |
| "grad_norm": 0.005352216682577747, | |
| "kl": 0.0008544921875, | |
| "learning_rate": 8.511087728614862e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 924.84375, | |
| "epoch": 34.5, | |
| "grad_norm": 0.005953464155512391, | |
| "kl": 0.000751495361328125, | |
| "learning_rate": 8.452265630457282e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 0.016277308014848297, | |
| "learning_rate": 8.392544243589427e-07, | |
| "loss": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 864.765625, | |
| "eval_kl": 0.00107574462890625, | |
| "eval_loss": 3.6555644328473136e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 103.6058, | |
| "eval_samples_per_second": 0.077, | |
| "eval_steps_per_second": 0.01, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 888.2890625, | |
| "epoch": 35.5, | |
| "grad_norm": 0.00851896616755137, | |
| "kl": 0.0011534690856933594, | |
| "learning_rate": 8.331941759724268e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 894.71875, | |
| "epoch": 36.0, | |
| "grad_norm": 0.007086845304726659, | |
| "kl": 0.0008459091186523438, | |
| "learning_rate": 8.270476638965461e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 987.375, | |
| "epoch": 36.5, | |
| "grad_norm": 0.005738334037241088, | |
| "kl": 0.0007448196411132812, | |
| "learning_rate": 8.208167604184217e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 812.09375, | |
| "epoch": 37.0, | |
| "grad_norm": 0.01060348535065019, | |
| "kl": 0.00112152099609375, | |
| "learning_rate": 8.145033635316128e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 858.890625, | |
| "epoch": 37.5, | |
| "grad_norm": 0.25436864848634955, | |
| "kl": 0.0011377334594726562, | |
| "learning_rate": 8.081093963579707e-07, | |
| "loss": 0.0323, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 940.40625, | |
| "epoch": 38.0, | |
| "grad_norm": 0.006724100368173609, | |
| "kl": 0.0008859634399414062, | |
| "learning_rate": 8.01636806561836e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 895.34375, | |
| "epoch": 38.5, | |
| "grad_norm": 0.008819033954111552, | |
| "kl": 0.0010356903076171875, | |
| "learning_rate": 7.950875657567621e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 974.9375, | |
| "epoch": 39.0, | |
| "grad_norm": 0.013437649890405814, | |
| "kl": 0.0010128021240234375, | |
| "learning_rate": 7.884636689049422e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 868.8125, | |
| "epoch": 39.5, | |
| "grad_norm": 0.36102678136071253, | |
| "kl": 0.00128936767578125, | |
| "learning_rate": 7.817671337095244e-07, | |
| "loss": 0.0071, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.005225754916645614, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 863.53125, | |
| "eval_kl": 0.0010199546813964844, | |
| "eval_loss": 3.5483633837429807e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 93.6372, | |
| "eval_samples_per_second": 0.085, | |
| "eval_steps_per_second": 0.011, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 945.6015625, | |
| "epoch": 40.5, | |
| "grad_norm": 0.3853250090014461, | |
| "kl": 0.0010247230529785156, | |
| "learning_rate": 7.681643291108517e-07, | |
| "loss": 0.0662, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 928.375, | |
| "epoch": 41.0, | |
| "grad_norm": 0.005636824414465673, | |
| "kl": 0.0008144378662109375, | |
| "learning_rate": 7.612622032536507e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 928.90625, | |
| "epoch": 41.5, | |
| "grad_norm": 0.009695421562075205, | |
| "kl": 0.001068115234375, | |
| "learning_rate": 7.54295724882796e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 963.625, | |
| "epoch": 42.0, | |
| "grad_norm": 0.009278458906845803, | |
| "kl": 0.0011796951293945312, | |
| "learning_rate": 7.472670160550848e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 971.390625, | |
| "epoch": 42.5, | |
| "grad_norm": 0.007871724093899331, | |
| "kl": 0.0010309219360351562, | |
| "learning_rate": 7.401782177833147e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 665.90625, | |
| "epoch": 43.0, | |
| "grad_norm": 0.2548433246597271, | |
| "kl": 0.0026493072509765625, | |
| "learning_rate": 7.330314893841101e-07, | |
| "loss": 0.0591, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 832.6875, | |
| "epoch": 43.5, | |
| "grad_norm": 0.008654348640434956, | |
| "kl": 0.0010986328125, | |
| "learning_rate": 7.258290078201731e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 971.375, | |
| "epoch": 44.0, | |
| "grad_norm": 0.23717723076741856, | |
| "kl": 0.0013666152954101562, | |
| "learning_rate": 7.185729670371604e-07, | |
| "loss": 0.0375, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 962.234375, | |
| "epoch": 44.5, | |
| "grad_norm": 0.015377158658563683, | |
| "kl": 0.0014400482177734375, | |
| "learning_rate": 7.11265577295385e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 0.008932566857599763, | |
| "learning_rate": 7.039090644965509e-07, | |
| "loss": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 926.8125, | |
| "eval_kl": 0.001495361328125, | |
| "eval_loss": 5.114699524710886e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 99.6982, | |
| "eval_samples_per_second": 0.08, | |
| "eval_steps_per_second": 0.01, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 893.046875, | |
| "epoch": 45.5, | |
| "grad_norm": 0.010366882749113264, | |
| "kl": 0.0011758804321289062, | |
| "learning_rate": 6.965056695057204e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 904.78125, | |
| "epoch": 46.0, | |
| "grad_norm": 0.02599396238517655, | |
| "kl": 0.0017518997192382812, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 848.65625, | |
| "epoch": 46.5, | |
| "grad_norm": 0.5691502068122966, | |
| "kl": 0.0011138916015625, | |
| "learning_rate": 6.815672671252315e-07, | |
| "loss": -0.1031, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 896.9375, | |
| "epoch": 47.0, | |
| "grad_norm": 0.011737006406008286, | |
| "kl": 0.00153350830078125, | |
| "learning_rate": 6.740368101176495e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 845.0625, | |
| "epoch": 47.5, | |
| "grad_norm": 0.387460398723794, | |
| "kl": 0.0025997161865234375, | |
| "learning_rate": 6.664685702961344e-07, | |
| "loss": 0.1011, | |
| "reward": 0.046875, | |
| "reward_std": 0.14789125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.046875, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.6875, | |
| "epoch": 48.0, | |
| "grad_norm": 0.3148662082343754, | |
| "kl": 0.0015659332275390625, | |
| "learning_rate": 6.588648530198504e-07, | |
| "loss": -0.0819, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 836.265625, | |
| "epoch": 48.5, | |
| "grad_norm": 0.010907755471241348, | |
| "kl": 0.0014362335205078125, | |
| "learning_rate": 6.512279744547392e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 875.25, | |
| "epoch": 49.0, | |
| "grad_norm": 0.012995813252816308, | |
| "kl": 0.0014848709106445312, | |
| "learning_rate": 6.435602608679916e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 940.625, | |
| "epoch": 49.5, | |
| "grad_norm": 0.012426286814278077, | |
| "kl": 0.00176239013671875, | |
| "learning_rate": 6.358640479194451e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 0.01574734631355814, | |
| "learning_rate": 6.281416799501187e-07, | |
| "loss": 0.0001, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 877.0859375, | |
| "eval_kl": 0.00176239013671875, | |
| "eval_loss": 5.840706580784172e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 104.8754, | |
| "eval_samples_per_second": 0.076, | |
| "eval_steps_per_second": 0.01, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 893.6875, | |
| "epoch": 50.5, | |
| "grad_norm": 0.01032773418561725, | |
| "kl": 0.0016312599182128906, | |
| "learning_rate": 6.203955092681039e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 849.125, | |
| "epoch": 51.0, | |
| "grad_norm": 0.015022844032421302, | |
| "kl": 0.0020303726196289062, | |
| "learning_rate": 6.126278954320294e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 989.890625, | |
| "epoch": 51.5, | |
| "grad_norm": 0.01614919998047259, | |
| "kl": 0.00213623046875, | |
| "learning_rate": 6.048412045323164e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 942.59375, | |
| "epoch": 52.0, | |
| "grad_norm": 0.010917168106263793, | |
| "kl": 0.0014476776123046875, | |
| "learning_rate": 5.97037808470444e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 944.640625, | |
| "epoch": 52.5, | |
| "grad_norm": 0.26063398005499927, | |
| "kl": 0.002063751220703125, | |
| "learning_rate": 5.892200842364462e-07, | |
| "loss": 0.0398, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 859.4375, | |
| "epoch": 53.0, | |
| "grad_norm": 0.3786401255845219, | |
| "kl": 0.0019893646240234375, | |
| "learning_rate": 5.813904131848564e-07, | |
| "loss": 0.0037, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 982.65625, | |
| "epoch": 53.5, | |
| "grad_norm": 0.017106845969968518, | |
| "kl": 0.0019397735595703125, | |
| "learning_rate": 5.735511803093248e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 764.34375, | |
| "epoch": 54.0, | |
| "grad_norm": 0.01473506964168249, | |
| "kl": 0.0020160675048828125, | |
| "learning_rate": 5.657047735161255e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 908.171875, | |
| "epoch": 54.5, | |
| "grad_norm": 0.018754498587991306, | |
| "kl": 0.0021953582763671875, | |
| "learning_rate": 5.578535828967777e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 0.010670548584562288, | |
| "learning_rate": 5.5e-07, | |
| "loss": 0.0001, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 948.3125, | |
| "eval_kl": 0.0020236968994140625, | |
| "eval_loss": 6.496578134829178e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 100.0071, | |
| "eval_samples_per_second": 0.08, | |
| "eval_steps_per_second": 0.01, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 990.34375, | |
| "epoch": 55.5, | |
| "grad_norm": 0.015563623719699342, | |
| "kl": 0.0019483566284179688, | |
| "learning_rate": 5.421464171032224e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 855.28125, | |
| "epoch": 56.0, | |
| "grad_norm": 0.008970140876741549, | |
| "kl": 0.001338958740234375, | |
| "learning_rate": 5.342952264838747e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 982.640625, | |
| "epoch": 56.5, | |
| "grad_norm": 0.0108924950539754, | |
| "kl": 0.0016727447509765625, | |
| "learning_rate": 5.264488196906752e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 903.78125, | |
| "epoch": 57.0, | |
| "grad_norm": 0.2686051732486419, | |
| "kl": 0.0024242401123046875, | |
| "learning_rate": 5.186095868151436e-07, | |
| "loss": 0.0391, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 853.375, | |
| "epoch": 57.5, | |
| "grad_norm": 0.2831838787851241, | |
| "kl": 0.0024967193603515625, | |
| "learning_rate": 5.107799157635538e-07, | |
| "loss": 0.0351, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 917.34375, | |
| "epoch": 58.0, | |
| "grad_norm": 0.013530004501295191, | |
| "kl": 0.001804351806640625, | |
| "learning_rate": 5.02962191529556e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 859.03125, | |
| "epoch": 58.5, | |
| "grad_norm": 0.44797396450129406, | |
| "kl": 0.0030193328857421875, | |
| "learning_rate": 4.951587954676837e-07, | |
| "loss": -0.0663, | |
| "reward": 0.03125, | |
| "reward_std": 0.08539125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 896.625, | |
| "epoch": 59.0, | |
| "grad_norm": 0.00998304849357523, | |
| "kl": 0.0016765594482421875, | |
| "learning_rate": 4.873721045679706e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 954.359375, | |
| "epoch": 59.5, | |
| "grad_norm": 0.22962099915558717, | |
| "kl": 0.0020122528076171875, | |
| "learning_rate": 4.79604490731896e-07, | |
| "loss": 0.0424, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 0.02248418123929179, | |
| "learning_rate": 4.7185832004988133e-07, | |
| "loss": 0.0001, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 960.3671875, | |
| "eval_kl": 0.0021600723266601562, | |
| "eval_loss": 7.169261516537517e-05, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 107.7429, | |
| "eval_samples_per_second": 0.074, | |
| "eval_steps_per_second": 0.009, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 837.0390625, | |
| "epoch": 60.5, | |
| "grad_norm": 0.017761869908686022, | |
| "kl": 0.0027360916137695312, | |
| "learning_rate": 4.641359520805548e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 822.75, | |
| "epoch": 61.0, | |
| "grad_norm": 0.016374329005476508, | |
| "kl": 0.00225830078125, | |
| "learning_rate": 4.5643973913200837e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 873.46875, | |
| "epoch": 61.5, | |
| "grad_norm": 0.22223040398437557, | |
| "kl": 0.0031833648681640625, | |
| "learning_rate": 4.4877202554526084e-07, | |
| "loss": 0.0424, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1016.71875, | |
| "epoch": 62.0, | |
| "grad_norm": 0.01056012461452998, | |
| "kl": 0.001789093017578125, | |
| "learning_rate": 4.4113514698014953e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 867.015625, | |
| "epoch": 62.5, | |
| "grad_norm": 0.012502277305124584, | |
| "kl": 0.0022029876708984375, | |
| "learning_rate": 4.3353142970386557e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1008.1875, | |
| "epoch": 63.0, | |
| "grad_norm": 0.017467667097429012, | |
| "kl": 0.002529144287109375, | |
| "learning_rate": 4.2596318988235037e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 876.40625, | |
| "epoch": 63.5, | |
| "grad_norm": 0.27826851686759685, | |
| "kl": 0.003124237060546875, | |
| "learning_rate": 4.1843273287476854e-07, | |
| "loss": 0.0208, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 931.4375, | |
| "epoch": 64.0, | |
| "grad_norm": 0.010111725537803692, | |
| "kl": 0.0016422271728515625, | |
| "learning_rate": 4.1094235253127374e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 940.890625, | |
| "epoch": 64.5, | |
| "grad_norm": 0.19504698473227727, | |
| "kl": 0.003879547119140625, | |
| "learning_rate": 4.034943304942796e-07, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "grad_norm": 0.3594474052537692, | |
| "learning_rate": 3.9609093550344907e-07, | |
| "loss": -0.031, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 872.6484375, | |
| "eval_kl": 0.0025424957275390625, | |
| "eval_loss": -0.011794866062700748, | |
| "eval_reward": 0.015625, | |
| "eval_reward_std": 0.042695626616477966, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.015625, | |
| "eval_runtime": 95.6188, | |
| "eval_samples_per_second": 0.084, | |
| "eval_steps_per_second": 0.01, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 827.2578125, | |
| "epoch": 65.5, | |
| "grad_norm": 0.031146258604450788, | |
| "kl": 0.002330780029296875, | |
| "learning_rate": 3.8873442270461485e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0078125, | |
| "reward_std": 0.03125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0078125, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1005.28125, | |
| "epoch": 66.0, | |
| "grad_norm": 0.23823090391570464, | |
| "kl": 0.00301361083984375, | |
| "learning_rate": 3.8142703296283953e-07, | |
| "loss": 0.0414, | |
| "reward": 0.03125, | |
| "reward_std": 0.08539125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 965.671875, | |
| "epoch": 66.5, | |
| "grad_norm": 0.016282343372095484, | |
| "kl": 0.0027065277099609375, | |
| "learning_rate": 3.7417099217982686e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 950.9375, | |
| "epoch": 67.0, | |
| "grad_norm": 0.37418953088110135, | |
| "kl": 0.00205230712890625, | |
| "learning_rate": 3.6696851061588994e-07, | |
| "loss": 0.0608, | |
| "reward": 0.03125, | |
| "reward_std": 0.125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 911.234375, | |
| "epoch": 67.5, | |
| "grad_norm": 0.012255261965707507, | |
| "kl": 0.0020465850830078125, | |
| "learning_rate": 3.5982178221668533e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 979.71875, | |
| "epoch": 68.0, | |
| "grad_norm": 0.26206279671926186, | |
| "kl": 0.002727508544921875, | |
| "learning_rate": 3.5273298394491515e-07, | |
| "loss": 0.0389, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 933.796875, | |
| "epoch": 68.5, | |
| "grad_norm": 0.01763374341802393, | |
| "kl": 0.002559661865234375, | |
| "learning_rate": 3.45704275117204e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 905.125, | |
| "epoch": 69.0, | |
| "grad_norm": 0.3004009853199538, | |
| "kl": 0.002872467041015625, | |
| "learning_rate": 3.387377967463493e-07, | |
| "loss": 0.0281, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 951.5625, | |
| "epoch": 69.5, | |
| "grad_norm": 0.0130597651929469, | |
| "kl": 0.0021648406982421875, | |
| "learning_rate": 3.3183567088914833e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "grad_norm": 0.01647973523518642, | |
| "learning_rate": 3.250000000000001e-07, | |
| "loss": 0.0001, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 888.59375, | |
| "eval_kl": 0.0026531219482421875, | |
| "eval_loss": 0.02596096135675907, | |
| "eval_reward": 0.0078125, | |
| "eval_reward_std": 0.03125, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0078125, | |
| "eval_runtime": 98.3732, | |
| "eval_samples_per_second": 0.081, | |
| "eval_steps_per_second": 0.01, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 873.234375, | |
| "epoch": 70.5, | |
| "grad_norm": 0.2834208593274132, | |
| "kl": 0.0033597946166992188, | |
| "learning_rate": 3.182328662904756e-07, | |
| "loss": 0.0572, | |
| "reward": 0.015625, | |
| "reward_std": 0.042695626616477966, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1037.5625, | |
| "epoch": 71.0, | |
| "grad_norm": 0.010414710134713713, | |
| "kl": 0.0019664764404296875, | |
| "learning_rate": 3.115363310950578e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 931.96875, | |
| "epoch": 71.5, | |
| "grad_norm": 0.27576871169806716, | |
| "kl": 0.0029125213623046875, | |
| "learning_rate": 3.0491243424323783e-07, | |
| "loss": 0.0371, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 892.28125, | |
| "epoch": 72.0, | |
| "grad_norm": 0.3229311945497025, | |
| "kl": 0.002956390380859375, | |
| "learning_rate": 2.9836319343816397e-07, | |
| "loss": -0.0668, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 794.59375, | |
| "epoch": 72.5, | |
| "grad_norm": 0.24794430012534288, | |
| "kl": 0.00383758544921875, | |
| "learning_rate": 2.918906036420294e-07, | |
| "loss": 0.0331, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 893.875, | |
| "epoch": 73.0, | |
| "grad_norm": 0.018126291722898823, | |
| "kl": 0.00266265869140625, | |
| "learning_rate": 2.854966364683872e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 954.375, | |
| "epoch": 73.5, | |
| "grad_norm": 0.2235013065843689, | |
| "kl": 0.002880096435546875, | |
| "learning_rate": 2.791832395815782e-07, | |
| "loss": 0.0445, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 804.0, | |
| "epoch": 74.0, | |
| "grad_norm": 0.3025438980959449, | |
| "kl": 0.003345489501953125, | |
| "learning_rate": 2.729523361034538e-07, | |
| "loss": 0.0521, | |
| "reward": 0.03125, | |
| "reward_std": 0.08539125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 971.578125, | |
| "epoch": 74.5, | |
| "grad_norm": 0.024475677293083028, | |
| "kl": 0.0037384033203125, | |
| "learning_rate": 2.6680582402757324e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "grad_norm": 0.012017701370876262, | |
| "learning_rate": 2.6074557564105724e-07, | |
| "loss": 0.0001, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 862.15625, | |
| "eval_kl": 0.0033111572265625, | |
| "eval_loss": 0.02151690237224102, | |
| "eval_reward": 0.0078125, | |
| "eval_reward_std": 0.03125, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0078125, | |
| "eval_runtime": 95.2607, | |
| "eval_samples_per_second": 0.084, | |
| "eval_steps_per_second": 0.01, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1021.8515625, | |
| "epoch": 75.5, | |
| "grad_norm": 0.344266822655093, | |
| "kl": 0.002719879150390625, | |
| "learning_rate": 2.547734369542718e-07, | |
| "loss": 0.0005, | |
| "reward": 0.015625, | |
| "reward_std": 0.042695626616477966, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 840.5, | |
| "epoch": 76.0, | |
| "grad_norm": 0.23923411010062942, | |
| "kl": 0.00366973876953125, | |
| "learning_rate": 2.488912271385139e-07, | |
| "loss": 0.042, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 944.609375, | |
| "epoch": 76.5, | |
| "grad_norm": 0.016984013306757723, | |
| "kl": 0.00287628173828125, | |
| "learning_rate": 2.4310073797187573e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 980.09375, | |
| "epoch": 77.0, | |
| "grad_norm": 0.013783286152037033, | |
| "kl": 0.002635955810546875, | |
| "learning_rate": 2.374037332934512e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 877.71875, | |
| "epoch": 77.5, | |
| "grad_norm": 0.17935562272538372, | |
| "kl": 0.003742218017578125, | |
| "learning_rate": 2.3180194846605364e-07, | |
| "loss": 0.0379, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 982.125, | |
| "epoch": 78.0, | |
| "grad_norm": 0.22095835889243898, | |
| "kl": 0.00313568115234375, | |
| "learning_rate": 2.2629708984760706e-07, | |
| "loss": 0.0429, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 853.6875, | |
| "epoch": 78.5, | |
| "grad_norm": 0.014922010725132247, | |
| "kl": 0.00323486328125, | |
| "learning_rate": 2.2089083427137329e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 725.46875, | |
| "epoch": 79.0, | |
| "grad_norm": 0.02027511748538313, | |
| "kl": 0.003879547119140625, | |
| "learning_rate": 2.1558482853517253e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 792.5, | |
| "epoch": 79.5, | |
| "grad_norm": 0.27874692769469644, | |
| "kl": 0.00482940673828125, | |
| "learning_rate": 2.1038068889975259e-07, | |
| "loss": 0.0339, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 0.01202834608521387, | |
| "learning_rate": 2.0528000059645995e-07, | |
| "loss": 0.0001, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 898.7109375, | |
| "eval_kl": 0.0037212371826171875, | |
| "eval_loss": -0.011074024252593517, | |
| "eval_reward": 0.015625, | |
| "eval_reward_std": 0.042695626616477966, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.015625, | |
| "eval_runtime": 106.259, | |
| "eval_samples_per_second": 0.075, | |
| "eval_steps_per_second": 0.009, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 979.0859375, | |
| "epoch": 80.5, | |
| "grad_norm": 0.018630946038723055, | |
| "kl": 0.002685546875, | |
| "learning_rate": 2.0028431734436308e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 854.25, | |
| "epoch": 81.0, | |
| "grad_norm": 0.020703125475339885, | |
| "kl": 0.003864288330078125, | |
| "learning_rate": 1.9539516087697517e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 842.71875, | |
| "epoch": 81.5, | |
| "grad_norm": 0.028387308129338176, | |
| "kl": 0.004302978515625, | |
| "learning_rate": 1.9061402047871833e-07, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 834.75, | |
| "epoch": 82.0, | |
| "grad_norm": 0.01641578674675751, | |
| "kl": 0.0029449462890625, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 842.40625, | |
| "epoch": 82.5, | |
| "grad_norm": 0.021721834190437855, | |
| "kl": 0.0041656494140625, | |
| "learning_rate": 1.8138158006995363e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 837.8125, | |
| "epoch": 83.0, | |
| "grad_norm": 0.01842609716407046, | |
| "kl": 0.003582000732421875, | |
| "learning_rate": 1.7693309235023127e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 837.453125, | |
| "epoch": 83.5, | |
| "grad_norm": 0.27337633068496925, | |
| "kl": 0.005832672119140625, | |
| "learning_rate": 1.7259824442455923e-07, | |
| "loss": 0.0532, | |
| "reward": 0.03125, | |
| "reward_std": 0.08539125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 934.5625, | |
| "epoch": 84.0, | |
| "grad_norm": 0.012528179411346417, | |
| "kl": 0.0025787353515625, | |
| "learning_rate": 1.6837835672960831e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 906.21875, | |
| "epoch": 84.5, | |
| "grad_norm": 0.016944597423777116, | |
| "kl": 0.003490447998046875, | |
| "learning_rate": 1.6427471468404952e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "grad_norm": 0.397851893169485, | |
| "learning_rate": 1.6028856829700258e-07, | |
| "loss": -0.057, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 884.7109375, | |
| "eval_kl": 0.0037250518798828125, | |
| "eval_loss": 0.0001178277816507034, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 94.4429, | |
| "eval_samples_per_second": 0.085, | |
| "eval_steps_per_second": 0.011, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 889.3359375, | |
| "epoch": 85.5, | |
| "grad_norm": 0.0165553280450213, | |
| "kl": 0.0038013458251953125, | |
| "learning_rate": 1.5642113178727193e-07, | |
| "loss": 0.0001, | |
| "reward": 0.015625, | |
| "reward_std": 0.042695626616477966, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 931.34375, | |
| "epoch": 86.0, | |
| "grad_norm": 0.22070193047632167, | |
| "kl": 0.00353240966796875, | |
| "learning_rate": 1.5267358321348285e-07, | |
| "loss": 0.0258, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 902.5625, | |
| "epoch": 86.5, | |
| "grad_norm": 0.014212788161515211, | |
| "kl": 0.002574920654296875, | |
| "learning_rate": 1.4904706411523448e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 734.3125, | |
| "epoch": 87.0, | |
| "grad_norm": 0.5334088299783731, | |
| "kl": 0.005619049072265625, | |
| "learning_rate": 1.4554267916537495e-07, | |
| "loss": 0.0865, | |
| "reward": 0.046875, | |
| "reward_std": 0.1875, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.046875, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 846.8125, | |
| "epoch": 87.5, | |
| "grad_norm": 0.3350297851743498, | |
| "kl": 0.005451202392578125, | |
| "learning_rate": 1.4216149583350755e-07, | |
| "loss": 0.0788, | |
| "reward": 0.03125, | |
| "reward_std": 0.125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 896.90625, | |
| "epoch": 88.0, | |
| "grad_norm": 0.012136250169886781, | |
| "kl": 0.002605438232421875, | |
| "learning_rate": 1.3890454406082956e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 924.78125, | |
| "epoch": 88.5, | |
| "grad_norm": 0.020936369813142675, | |
| "kl": 0.004558563232421875, | |
| "learning_rate": 1.3577281594640182e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1008.28125, | |
| "epoch": 89.0, | |
| "grad_norm": 0.017396797408268067, | |
| "kl": 0.003875732421875, | |
| "learning_rate": 1.3276726544494571e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 972.84375, | |
| "epoch": 89.5, | |
| "grad_norm": 0.46133339595558576, | |
| "kl": 0.003589630126953125, | |
| "learning_rate": 1.2988880807625927e-07, | |
| "loss": -0.0749, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "grad_norm": 0.2749649880366191, | |
| "learning_rate": 1.2713832064634125e-07, | |
| "loss": 0.0315, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 928.6796875, | |
| "eval_kl": 0.0047626495361328125, | |
| "eval_loss": 0.06055343151092529, | |
| "eval_reward": 0.03125, | |
| "eval_reward_std": 0.10519562661647797, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.03125, | |
| "eval_runtime": 101.2345, | |
| "eval_samples_per_second": 0.079, | |
| "eval_steps_per_second": 0.01, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 887.515625, | |
| "epoch": 90.5, | |
| "grad_norm": 0.22634210714921138, | |
| "kl": 0.004863739013671875, | |
| "learning_rate": 1.2451664098030743e-07, | |
| "loss": 0.0395, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 975.1875, | |
| "epoch": 91.0, | |
| "grad_norm": 0.019843010782603646, | |
| "kl": 0.0040130615234375, | |
| "learning_rate": 1.220245676671809e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 936.265625, | |
| "epoch": 91.5, | |
| "grad_norm": 0.35206959012280886, | |
| "kl": 0.003910064697265625, | |
| "learning_rate": 1.1966285981663407e-07, | |
| "loss": 0.0049, | |
| "reward": 0.046875, | |
| "reward_std": 0.14789125323295593, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.046875, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.53125, | |
| "epoch": 92.0, | |
| "grad_norm": 0.3276867393127649, | |
| "kl": 0.0047760009765625, | |
| "learning_rate": 1.1743223682775649e-07, | |
| "loss": 0.0324, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 976.5, | |
| "epoch": 92.5, | |
| "grad_norm": 0.24825006369205177, | |
| "kl": 0.004093170166015625, | |
| "learning_rate": 1.1533337816991931e-07, | |
| "loss": 0.0156, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 803.0625, | |
| "epoch": 93.0, | |
| "grad_norm": 0.022316490692140336, | |
| "kl": 0.0040130615234375, | |
| "learning_rate": 1.1336692317580158e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.28125, | |
| "epoch": 93.5, | |
| "grad_norm": 0.4846328870102803, | |
| "kl": 0.00514984130859375, | |
| "learning_rate": 1.1153347084664419e-07, | |
| "loss": -0.0498, | |
| "reward": 0.03125, | |
| "reward_std": 0.125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.03125, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1054.5, | |
| "epoch": 94.0, | |
| "grad_norm": 0.22552887394065263, | |
| "kl": 0.003173828125, | |
| "learning_rate": 1.0983357966978745e-07, | |
| "loss": 0.0259, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 896.578125, | |
| "epoch": 94.5, | |
| "grad_norm": 0.28343113199408254, | |
| "kl": 0.0047607421875, | |
| "learning_rate": 1.0826776744855121e-07, | |
| "loss": 0.0366, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "grad_norm": 0.018331093652178724, | |
| "learning_rate": 1.068365111445064e-07, | |
| "loss": 0.0001, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 926.5390625, | |
| "eval_kl": 0.004039764404296875, | |
| "eval_loss": 0.0001301074807997793, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0, | |
| "eval_runtime": 100.4368, | |
| "eval_samples_per_second": 0.08, | |
| "eval_steps_per_second": 0.01, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 902.8046875, | |
| "epoch": 95.5, | |
| "grad_norm": 0.27554893857164553, | |
| "kl": 0.0040225982666015625, | |
| "learning_rate": 1.0554024673218806e-07, | |
| "loss": 0.052, | |
| "reward": 0.0078125, | |
| "reward_std": 0.03125, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0078125, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 753.625, | |
| "epoch": 96.0, | |
| "grad_norm": 0.4490555309724536, | |
| "kl": 0.004604339599609375, | |
| "learning_rate": 1.0437936906629334e-07, | |
| "loss": -0.0745, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 829.953125, | |
| "epoch": 96.5, | |
| "grad_norm": 0.02546191399674691, | |
| "kl": 0.00514984130859375, | |
| "learning_rate": 1.0335423176140511e-07, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 852.78125, | |
| "epoch": 97.0, | |
| "grad_norm": 0.01515669426337304, | |
| "kl": 0.003253936767578125, | |
| "learning_rate": 1.0246514708427701e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 901.71875, | |
| "epoch": 97.5, | |
| "grad_norm": 0.01672130582287373, | |
| "kl": 0.003467559814453125, | |
| "learning_rate": 1.017123858587145e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 904.375, | |
| "epoch": 98.0, | |
| "grad_norm": 0.021287930078995584, | |
| "kl": 0.004909515380859375, | |
| "learning_rate": 1.0109617738307911e-07, | |
| "loss": 0.0002, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 970.984375, | |
| "epoch": 98.5, | |
| "grad_norm": 0.024329146341496674, | |
| "kl": 0.00463104248046875, | |
| "learning_rate": 1.0061670936044178e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 837.75, | |
| "epoch": 99.0, | |
| "grad_norm": 0.25418431822719284, | |
| "kl": 0.004810333251953125, | |
| "learning_rate": 1.002741278414069e-07, | |
| "loss": 0.0424, | |
| "reward": 0.015625, | |
| "reward_std": 0.0625, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.015625, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 894.921875, | |
| "epoch": 99.5, | |
| "grad_norm": 0.015548877308025595, | |
| "kl": 0.0036163330078125, | |
| "learning_rate": 1.0006853717962393e-07, | |
| "loss": 0.0001, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 0.02199600718524651, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0002, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 909.0390625, | |
| "eval_kl": 0.004390716552734375, | |
| "eval_loss": 0.02110510692000389, | |
| "eval_reward": 0.0078125, | |
| "eval_reward_std": 0.03125, | |
| "eval_rewards/accuracy_reward_staging": 0.0, | |
| "eval_rewards/format_reward": 0.0, | |
| "eval_rewards/format_reward_staging": 0.0078125, | |
| "eval_runtime": 101.442, | |
| "eval_samples_per_second": 0.079, | |
| "eval_steps_per_second": 0.01, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 893.78125, | |
| "epoch": 100.0, | |
| "kl": 0.004932403564453125, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward_staging": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "rewards/format_reward_staging": 0.0, | |
| "step": 200, | |
| "total_flos": 0.0, | |
| "train_loss": 0.007099090418751075, | |
| "train_runtime": 14240.9534, | |
| "train_samples_per_second": 0.056, | |
| "train_steps_per_second": 0.014 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 40, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |