| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.0074413989830088055, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7188.5, | |
| "epoch": 2.480466327669602e-05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3333333333333334e-08, | |
| "loss": 0.0, | |
| "num_tokens": 29342.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4402.25, | |
| "epoch": 4.960932655339204e-05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": 0.0, | |
| "num_tokens": 47507.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4629.75, | |
| "epoch": 7.441398983008806e-05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-07, | |
| "loss": 0.0, | |
| "num_tokens": 66606.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4487.75, | |
| "epoch": 9.921865310678408e-05, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 0.0, | |
| "num_tokens": 85313.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3864.5, | |
| "epoch": 0.00012402331638348009, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "loss": 0.0, | |
| "num_tokens": 101771.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 756.75, | |
| "epoch": 0.00014882797966017612, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0, | |
| "num_tokens": 105186.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 317.0, | |
| "epoch": 0.00017363264293687212, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.3333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 107054.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 957.5, | |
| "epoch": 0.00019843730621356816, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 0.0, | |
| "num_tokens": 111668.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3236.75, | |
| "epoch": 0.00022324196949026416, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3e-07, | |
| "loss": 0.0, | |
| "num_tokens": 125363.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4005.0, | |
| "epoch": 0.00024804663276696017, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 141995.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7189.75, | |
| "epoch": 0.00027285129604365623, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 171334.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1106.75, | |
| "epoch": 0.00029765595932035224, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4e-07, | |
| "loss": 0.0, | |
| "num_tokens": 176401.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7617.0, | |
| "epoch": 0.00032246062259704824, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.3333333333333335e-07, | |
| "loss": 0.0, | |
| "num_tokens": 207361.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 674.25, | |
| "epoch": 0.00034726528587374425, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 210422.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7167.25, | |
| "epoch": 0.0003720699491504403, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0, | |
| "num_tokens": 239887.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 756.25, | |
| "epoch": 0.0003968746124271363, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 243312.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4135.25, | |
| "epoch": 0.0004216792757038323, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 260569.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4373.0, | |
| "epoch": 0.00044648393898052833, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6e-07, | |
| "loss": 0.0, | |
| "num_tokens": 278469.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1157.5, | |
| "epoch": 0.00047128860225722433, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.333333333333332e-07, | |
| "loss": 0.0, | |
| "num_tokens": 283755.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 951.25, | |
| "epoch": 0.0004960932655339203, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 288076.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 724.5, | |
| "epoch": 0.0005208979288106164, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7e-07, | |
| "loss": 0.0, | |
| "num_tokens": 291390.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 657.25, | |
| "epoch": 0.0005457025920873125, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.333333333333332e-07, | |
| "loss": 0.0, | |
| "num_tokens": 294403.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 669.5, | |
| "epoch": 0.0005705072553640084, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.666666666666667e-07, | |
| "loss": 0.0, | |
| "num_tokens": 297541.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3391.0, | |
| "epoch": 0.0005953119186407045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8e-07, | |
| "loss": 0.0, | |
| "num_tokens": 311721.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 881.75, | |
| "epoch": 0.0006201165819174004, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 315736.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 651.75, | |
| "epoch": 0.0006449212451940965, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.666666666666667e-07, | |
| "loss": 0.0, | |
| "num_tokens": 318767.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1553.0, | |
| "epoch": 0.0006697259084707925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9e-07, | |
| "loss": 0.0, | |
| "num_tokens": 325395.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4422.25, | |
| "epoch": 0.0006945305717474885, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 343972.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 834.75, | |
| "epoch": 0.0007193352350241846, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 347987.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4034.25, | |
| "epoch": 0.0007441398983008806, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 364944.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 789.25, | |
| "epoch": 0.0007689445615775766, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.99966154001881e-07, | |
| "loss": 0.0, | |
| "num_tokens": 368653.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6620.5, | |
| "epoch": 0.0007937492248542726, | |
| "grad_norm": 0.16441772878170013, | |
| "learning_rate": 9.998646205897307e-07, | |
| "loss": 0.178, | |
| "num_tokens": 395735.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4088.25, | |
| "epoch": 0.0008185538881309686, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.996954135095478e-07, | |
| "loss": 0.0, | |
| "num_tokens": 413020.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6849.0, | |
| "epoch": 0.0008433585514076646, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.994585556692624e-07, | |
| "loss": 0.0, | |
| "num_tokens": 440984.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1422.75, | |
| "epoch": 0.0008681632146843607, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.991540791356342e-07, | |
| "loss": 0.0, | |
| "num_tokens": 447163.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 556.25, | |
| "epoch": 0.0008929678779610567, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.98782025129912e-07, | |
| "loss": 0.0, | |
| "num_tokens": 450580.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 721.75, | |
| "epoch": 0.0009177725412377527, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.983424440222529e-07, | |
| "loss": 0.0, | |
| "num_tokens": 453967.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2892.5, | |
| "epoch": 0.0009425772045144487, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.978353953249021e-07, | |
| "loss": 0.0, | |
| "num_tokens": 466049.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2498.25, | |
| "epoch": 0.0009673818677911447, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.972609476841365e-07, | |
| "loss": 0.0, | |
| "num_tokens": 476494.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5306.25, | |
| "epoch": 0.0009921865310678407, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.966191788709714e-07, | |
| "loss": 0.0, | |
| "num_tokens": 498639.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2374.25, | |
| "epoch": 0.0010169911943445368, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.959101757706308e-07, | |
| "loss": 0.0, | |
| "num_tokens": 508772.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1571.75, | |
| "epoch": 0.0010417958576212328, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.95134034370785e-07, | |
| "loss": 0.0, | |
| "num_tokens": 515583.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3794.5, | |
| "epoch": 0.0010666005208979288, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.942908597485558e-07, | |
| "loss": 0.0, | |
| "num_tokens": 531425.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2603.25, | |
| "epoch": 0.001091405184174625, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.933807660562896e-07, | |
| "loss": 0.0, | |
| "num_tokens": 542478.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.5, | |
| "epoch": 0.0011162098474513209, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.92403876506104e-07, | |
| "loss": 0.0, | |
| "num_tokens": 546324.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0011410145107280168, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.913603233532067e-07, | |
| "loss": 0.0, | |
| "num_tokens": 579840.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3327.0, | |
| "epoch": 0.0011658191740047128, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.902502478779896e-07, | |
| "loss": 0.0, | |
| "num_tokens": 593964.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2645.75, | |
| "epoch": 0.001190623837281409, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.890738003669027e-07, | |
| "loss": 0.0, | |
| "num_tokens": 605147.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2478.75, | |
| "epoch": 0.001215428500558105, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.878311400921072e-07, | |
| "loss": 0.0, | |
| "num_tokens": 615638.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6159.0, | |
| "epoch": 0.0012402331638348009, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.865224352899118e-07, | |
| "loss": 0.0, | |
| "num_tokens": 640798.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3449.25, | |
| "epoch": 0.001265037827111497, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.851478631379982e-07, | |
| "loss": 0.0, | |
| "num_tokens": 655443.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7229.75, | |
| "epoch": 0.001289842490388193, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.837076097314318e-07, | |
| "loss": 0.0, | |
| "num_tokens": 685146.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3381.75, | |
| "epoch": 0.001314647153664889, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.822018700574694e-07, | |
| "loss": 0.0, | |
| "num_tokens": 699265.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6892.0, | |
| "epoch": 0.001339451816941585, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.806308479691594e-07, | |
| "loss": 0.0, | |
| "num_tokens": 727501.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5287.75, | |
| "epoch": 0.001364256480218281, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.789947561577443e-07, | |
| "loss": 0.0, | |
| "num_tokens": 749364.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5510.75, | |
| "epoch": 0.001389061143494977, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.77293816123866e-07, | |
| "loss": 0.0, | |
| "num_tokens": 771947.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1866.25, | |
| "epoch": 0.0014138658067716732, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.755282581475767e-07, | |
| "loss": 0.0, | |
| "num_tokens": 780588.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7529.75, | |
| "epoch": 0.0014386704700483691, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.736983212571645e-07, | |
| "loss": 0.0, | |
| "num_tokens": 811727.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.001463475133325065, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.718042531967916e-07, | |
| "loss": 0.0, | |
| "num_tokens": 844919.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4105.0, | |
| "epoch": 0.0014882797966017612, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.698463103929541e-07, | |
| "loss": 0.0, | |
| "num_tokens": 861815.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2881.25, | |
| "epoch": 0.0015130844598784572, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.678247579197658e-07, | |
| "loss": 0.0, | |
| "num_tokens": 873944.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2342.5, | |
| "epoch": 0.0015378891231551531, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.657398694630712e-07, | |
| "loss": 0.0, | |
| "num_tokens": 884086.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7138.75, | |
| "epoch": 0.001562693786431849, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.635919272833937e-07, | |
| "loss": 0.0, | |
| "num_tokens": 913165.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7349.25, | |
| "epoch": 0.0015874984497085453, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.613812221777212e-07, | |
| "loss": 0.0, | |
| "num_tokens": 943130.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3842.5, | |
| "epoch": 0.0016123031129852412, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.591080534401371e-07, | |
| "loss": 0.0, | |
| "num_tokens": 959160.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1762.5, | |
| "epoch": 0.0016371077762619372, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.567727288213004e-07, | |
| "loss": 0.0, | |
| "num_tokens": 966626.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5473.25, | |
| "epoch": 0.0016619124395386333, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.543755644867822e-07, | |
| "loss": 0.0, | |
| "num_tokens": 989395.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5421.75, | |
| "epoch": 0.0016867171028153293, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.519168849742602e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1011750.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5362.0, | |
| "epoch": 0.0017115217660920252, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.493970231495834e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1033958.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1578.0, | |
| "epoch": 0.0017363264293687214, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.468163201617061e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1040734.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3258.5, | |
| "epoch": 0.0017611310926454174, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.44175125396502e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1054364.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1447.0, | |
| "epoch": 0.0017859357559221133, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.414737964294634e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1061444.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4742.25, | |
| "epoch": 0.0018107404191988095, | |
| "grad_norm": 0.26786288619041443, | |
| "learning_rate": 9.387126989772909e-07, | |
| "loss": -0.0049, | |
| "num_tokens": 1080881.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5773502588272095, | |
| "rewards/accuracy_reward": 0.5, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3059.0, | |
| "epoch": 0.0018355450824755054, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.358922068483811e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1093829.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2770.5, | |
| "epoch": 0.0018603497457522014, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.330127018922193e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1105535.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0018851544090288973, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.300745739476828e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1139031.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6549.5, | |
| "epoch": 0.0019099590723055935, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.270782207902627e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1166325.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 903.25, | |
| "epoch": 0.0019347637355822895, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.240240480782129e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1170538.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2268.0, | |
| "epoch": 0.0019595683988589854, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.209124692976287e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1180026.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4045.25, | |
| "epoch": 0.0019843730621356814, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.177439057064682e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1196683.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4910.75, | |
| "epoch": 0.0020091777254123773, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.145187862775208e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1216882.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 441.75, | |
| "epoch": 0.0020339823886890737, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.112375476403311e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1219041.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6115.75, | |
| "epoch": 0.0020587870519657697, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.079006340220861e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1244036.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 851.0, | |
| "epoch": 0.0020835917152424656, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.045084971874737e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1248104.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1183.5, | |
| "epoch": 0.0021083963785191616, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.010615963775219e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1253298.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2500.75, | |
| "epoch": 0.0021332010417958575, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.975603982474238e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1263869.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6133.0, | |
| "epoch": 0.0021580057050725535, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.940053768033608e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1289193.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1037.25, | |
| "epoch": 0.00218281036834925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.903970133383296e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1293746.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7457.75, | |
| "epoch": 0.002207615031625946, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.86735796366982e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1324261.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 614.75, | |
| "epoch": 0.0022324196949026417, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.83022221559489e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1327344.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1136.25, | |
| "epoch": 0.0022572243581793377, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.792567916744345e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1332425.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3865.75, | |
| "epoch": 0.0022820290214560337, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.754400164907496e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1348472.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7174.25, | |
| "epoch": 0.0023068336847327296, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.71572412738697e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1377825.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4552.75, | |
| "epoch": 0.0023316383480094256, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.676545040299143e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1396588.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4767.25, | |
| "epoch": 0.002356443011286122, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.636868207865243e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1416321.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1088.75, | |
| "epoch": 0.002381247674562818, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.596699001693255e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1421132.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.002406052337839514, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.556042860050685e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1454580.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3769.75, | |
| "epoch": 0.00243085700111621, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.514905287128309e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1470555.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 560.75, | |
| "epoch": 0.0024556616643929058, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.473291852294986e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1473302.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7399.75, | |
| "epoch": 0.0024804663276696017, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.431208189343669e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1503753.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5177.75, | |
| "epoch": 0.002505270990946298, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.388659995728662e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1524904.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5838.5, | |
| "epoch": 0.002530075654222994, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.34565303179429e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1548710.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3594.5, | |
| "epoch": 0.00255488031749969, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.302193119995038e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1563572.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6636.5, | |
| "epoch": 0.002579684980776386, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.258286144107276e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1590698.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3181.25, | |
| "epoch": 0.002604489644053082, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.213938048432696e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1603955.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4676.0, | |
| "epoch": 0.002629294307329778, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.16915483699355e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1623231.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 694.75, | |
| "epoch": 0.0026540989706064742, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.123942572719799e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1626562.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1234.75, | |
| "epoch": 0.00267890363388317, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.07830737662829e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1631929.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2437.0, | |
| "epoch": 0.002703708297159866, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.032255426994068e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1642117.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4050.75, | |
| "epoch": 0.002728512960436562, | |
| "grad_norm": 0.2529246211051941, | |
| "learning_rate": 7.985792958513931e-07, | |
| "loss": -0.0316, | |
| "num_tokens": 1658812.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2922.25, | |
| "epoch": 0.002753317623713258, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.938926261462365e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1671017.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5514.75, | |
| "epoch": 0.002778122286989954, | |
| "grad_norm": 0.2133975625038147, | |
| "learning_rate": 7.891661680839932e-07, | |
| "loss": 0.1083, | |
| "num_tokens": 1694024.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1548.25, | |
| "epoch": 0.00280292695026665, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.844005615514258e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1700829.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4555.25, | |
| "epoch": 0.0028277316135433463, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.795964517353733e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1719742.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 549.25, | |
| "epoch": 0.0028525362768200423, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.74754489035403e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1722415.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6851.0, | |
| "epoch": 0.0028773409400967382, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.698753289757564e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1750299.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6247.5, | |
| "epoch": 0.002902145603373434, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.649596321166024e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1776377.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6994.5, | |
| "epoch": 0.00292695026665013, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.600080639646076e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1805067.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5748.25, | |
| "epoch": 0.002951754929926826, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.550212948828376e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1828768.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 704.25, | |
| "epoch": 0.0029765595932035225, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1831957.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2971.25, | |
| "epoch": 0.0030013642564802184, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.449448591190434e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1844478.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1162.5, | |
| "epoch": 0.0030261689197569144, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.398565566251232e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1850380.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 653.75, | |
| "epoch": 0.0030509735830336103, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.347357813929454e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1853379.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0030757782463103063, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.295832266935058e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1887027.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 867.25, | |
| "epoch": 0.0031005829095870022, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.243995901002311e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1890888.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2747.25, | |
| "epoch": 0.003125387572863698, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.191855733945386e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1902673.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 869.75, | |
| "epoch": 0.0031501922361403946, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.139418824708271e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1906624.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 826.5, | |
| "epoch": 0.0031749968994170905, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.086692272409089e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1910582.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5324.75, | |
| "epoch": 0.0031998015626937865, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.033683215379002e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1932469.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1146.75, | |
| "epoch": 0.0032246062259704824, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.980398830195784e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1937920.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 595.0, | |
| "epoch": 0.0032494108892471784, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.926846330712241e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1940816.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2312.5, | |
| "epoch": 0.0032742155525238743, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.87303296707956e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1951390.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1446.5, | |
| "epoch": 0.0032990202158005707, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.818966024765757e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1957664.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1437.0, | |
| "epoch": 0.0033238248790772667, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.764652823569343e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1963868.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3721.25, | |
| "epoch": 0.0033486295423539626, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.710100716628344e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1979441.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2822.25, | |
| "epoch": 0.0033734342056306586, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.65531708942479e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1991198.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7604.75, | |
| "epoch": 0.0033982388689073545, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.600309358784857e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2022213.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1285.75, | |
| "epoch": 0.0034230435321840505, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.545084971874736e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2027864.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2961.0, | |
| "epoch": 0.0034478481954607464, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.489651405192409e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2040244.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3632.5, | |
| "epoch": 0.003472652858737443, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.434016163555451e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2055550.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1019.0, | |
| "epoch": 0.0034974575220141388, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.378186779084995e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2060194.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0035222621852908347, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.322170810186011e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2093590.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 680.75, | |
| "epoch": 0.0035470668485675307, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.265975840524009e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2096849.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3868.5, | |
| "epoch": 0.0035718715118442266, | |
| "grad_norm": 0.3706275522708893, | |
| "learning_rate": 6.209609477998338e-07, | |
| "loss": 0.1572, | |
| "num_tokens": 2112955.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5773502588272095, | |
| "rewards/accuracy_reward": 0.5, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2470.5, | |
| "epoch": 0.0035966761751209226, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.153079353712201e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2123349.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1336.25, | |
| "epoch": 0.003621480838397619, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.096393120939516e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2129378.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3586.0, | |
| "epoch": 0.003646285501674315, | |
| "grad_norm": 0.2838553190231323, | |
| "learning_rate": 6.039558454088795e-07, | |
| "loss": -0.2103, | |
| "num_tokens": 2144430.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5773502588272095, | |
| "rewards/accuracy_reward": 0.5, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1188.0, | |
| "epoch": 0.003671090164951011, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.98258304766415e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2149782.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 516.0, | |
| "epoch": 0.003695894828227707, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.925474615223572e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2152310.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2670.25, | |
| "epoch": 0.0037206994915044028, | |
| "grad_norm": 0.2669675350189209, | |
| "learning_rate": 5.868240888334652e-07, | |
| "loss": -0.0001, | |
| "num_tokens": 2163647.0, | |
| "reward": 0.75, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.75, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0037455041547810987, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.810889615527838e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2196923.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1642.25, | |
| "epoch": 0.0037703088180577947, | |
| "grad_norm": 0.3528982996940613, | |
| "learning_rate": 5.753428561247415e-07, | |
| "loss": -0.1153, | |
| "num_tokens": 2204116.0, | |
| "reward": 0.75, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.75, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1804.0, | |
| "epoch": 0.003795113481334491, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.695865504800327e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2212232.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 456.75, | |
| "epoch": 0.003819918144611187, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.638208239302974e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2214579.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5440.5, | |
| "epoch": 0.003844722807887883, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.580464570626151e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2236929.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1472.0, | |
| "epoch": 0.003869527471164579, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.522642316338268e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2243433.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1144.0, | |
| "epoch": 0.003894332134441275, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.464749304646961e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2248525.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2829.5, | |
| "epoch": 0.003919136797717971, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.406793373339292e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2260291.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7811.0, | |
| "epoch": 0.003943941460994667, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.348782368720625e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2292039.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3727.5, | |
| "epoch": 0.003968746124271363, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.290724144552379e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2307989.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3684.75, | |
| "epoch": 0.003993550787548059, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.232626560988734e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2323668.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6380.25, | |
| "epoch": 0.004018355450824755, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.174497483512505e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2350017.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7492.25, | |
| "epoch": 0.004043160114101451, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.116344781870281e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2380890.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4281.25, | |
| "epoch": 0.004067964777378147, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.058176329006985e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2398691.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5504.25, | |
| "epoch": 0.004092769440654843, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2421208.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2762.0, | |
| "epoch": 0.004117574103931539, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.941823670993015e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2432736.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3381.0, | |
| "epoch": 0.004142378767208235, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.883655218129719e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2446848.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3178.5, | |
| "epoch": 0.004167183430484931, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.825502516487496e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2460906.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2739.0, | |
| "epoch": 0.004191988093761628, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.7673734390112666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2472598.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7138.25, | |
| "epoch": 0.004216792757038323, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.7092758554476206e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2501723.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7294.75, | |
| "epoch": 0.0042415974203150195, | |
| "grad_norm": 0.17870603501796722, | |
| "learning_rate": 4.6512176312793735e-07, | |
| "loss": -0.018, | |
| "num_tokens": 2531502.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5977.25, | |
| "epoch": 0.004266402083591715, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.593206626660709e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2555995.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5944.5, | |
| "epoch": 0.004291206746868411, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.535250695353039e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2580333.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 974.75, | |
| "epoch": 0.004316011410145107, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.477357683661733e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2584660.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4904.25, | |
| "epoch": 0.004340816073421803, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.419535429373848e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2605065.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6226.0, | |
| "epoch": 0.0043656207366985, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.3617917606970267e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2630729.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 743.5, | |
| "epoch": 0.004390425399975195, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.304134495199674e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2634155.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1183.5, | |
| "epoch": 0.004415230063251892, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.246571438752584e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2639329.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6240.75, | |
| "epoch": 0.004440034726528587, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.1891103844721634e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2664792.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1472.0, | |
| "epoch": 0.0044648393898052835, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.131759111665348e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2671380.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3143.5, | |
| "epoch": 0.004489644053081979, | |
| "grad_norm": 0.2921382188796997, | |
| "learning_rate": 4.074525384776428e-07, | |
| "loss": 0.0159, | |
| "num_tokens": 2684494.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5773502588272095, | |
| "rewards/accuracy_reward": 0.5, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7197.5, | |
| "epoch": 0.004514448716358675, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.0174169523358485e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2713816.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2604.0, | |
| "epoch": 0.004539253379635372, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.960441545911204e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2725404.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5705.5, | |
| "epoch": 0.004564058042912067, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.9036068790604823e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2748802.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6358.75, | |
| "epoch": 0.004588862706188764, | |
| "grad_norm": 0.16501788794994354, | |
| "learning_rate": 3.846920646287799e-07, | |
| "loss": 0.0842, | |
| "num_tokens": 2774653.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3738.0, | |
| "epoch": 0.004613667369465459, | |
| "grad_norm": 0.3154926300048828, | |
| "learning_rate": 3.790390522001662e-07, | |
| "loss": 0.0946, | |
| "num_tokens": 2790353.0, | |
| "reward": 0.75, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.75, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3408.25, | |
| "epoch": 0.004638472032742156, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.734024159475991e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2804462.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 850.0, | |
| "epoch": 0.004663276696018851, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.6778291898139903e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2808434.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2068.0, | |
| "epoch": 0.0046880813592955475, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.621813220915004e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2817286.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1111.0, | |
| "epoch": 0.004712886022572244, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.56598383644455e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2822274.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8170.5, | |
| "epoch": 0.004737690685848939, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5103485948075894e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2855644.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7154.75, | |
| "epoch": 0.004762495349125636, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.454915028125263e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2884907.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4736.75, | |
| "epoch": 0.004787300012402331, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3996906412151417e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2904514.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 593.0, | |
| "epoch": 0.004812104675679028, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.34468291057521e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2907338.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4472.75, | |
| "epoch": 0.004836909338955724, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.2898992833716563e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2925641.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 590.0, | |
| "epoch": 0.00486171400223242, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.235347176430656e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2928537.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2825.25, | |
| "epoch": 0.004886518665509116, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.181033975234244e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2940418.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6512.0, | |
| "epoch": 0.0049113233287858115, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.1269670329204393e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2967054.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 608.5, | |
| "epoch": 0.004936127992062508, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.073153669287759e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2969848.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3488.25, | |
| "epoch": 0.004960932655339203, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.0196011698042156e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2984313.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 647.0, | |
| "epoch": 0.0049857373186159, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.9663167846209996e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2987433.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1120.5, | |
| "epoch": 0.005010541981892596, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.9133077275909107e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2992407.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1203.75, | |
| "epoch": 0.005035346645169292, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.86058117529173e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2997654.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 324.75, | |
| "epoch": 0.005060151308445988, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.808144266054612e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2999325.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7551.25, | |
| "epoch": 0.005084955971722684, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.756004098997689e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3029986.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4129.0, | |
| "epoch": 0.00510976063499938, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.7041677330649406e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3047106.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1562.5, | |
| "epoch": 0.0051345652982760755, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6526421860705473e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3053824.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8058.25, | |
| "epoch": 0.005159369961552772, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6014344337487703e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3086921.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6682.0, | |
| "epoch": 0.005184174624829468, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.550551408809565e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3114257.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1586.25, | |
| "epoch": 0.005208979288106164, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.500000000000001e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3121206.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4050.5, | |
| "epoch": 0.00523378395138286, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4497870511716237e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3138020.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1723.0, | |
| "epoch": 0.005258588614659556, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.399919360353923e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3145332.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3516.75, | |
| "epoch": 0.005283393277936252, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.350403678833976e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3160275.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3455.0, | |
| "epoch": 0.0053081979412129485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.301246710242437e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3175927.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5161.5, | |
| "epoch": 0.005333002604489644, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.25245510964597e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3197301.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3201.25, | |
| "epoch": 0.00535780726776634, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.2040354826462664e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3210914.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4491.75, | |
| "epoch": 0.005382611931043036, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.155994384485742e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3229381.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2169.0, | |
| "epoch": 0.005407416594319732, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.1083383191600672e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3238517.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3591.0, | |
| "epoch": 0.005432221257596428, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0610737385376348e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3253581.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2297.5, | |
| "epoch": 0.005457025920873124, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.01420704148607e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3263263.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5212.25, | |
| "epoch": 0.0054818305841498206, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9677445730059344e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3284604.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 706.25, | |
| "epoch": 0.005506635247426516, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9216926233717084e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3287817.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 545.5, | |
| "epoch": 0.0055314399107032125, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8760574272801998e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3290371.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2282.0, | |
| "epoch": 0.005556244573979908, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.830845163006448e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3300159.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 825.75, | |
| "epoch": 0.005581049237256604, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7860619515673032e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3303922.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0056058539005333, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.741713855892724e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3337458.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1880.75, | |
| "epoch": 0.005630658563809996, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.697806880004962e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3345573.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5774.0, | |
| "epoch": 0.005655463227086693, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6543469682057104e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3369561.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1106.0, | |
| "epoch": 0.005680267890363388, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6113400042713388e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3374385.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 625.0, | |
| "epoch": 0.005705072553640085, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5687918106563325e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3377409.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 991.75, | |
| "epoch": 0.00572987721691678, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5267081477050131e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3382456.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0057546818801934765, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4850947128716911e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3416072.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3301.25, | |
| "epoch": 0.005779486543470172, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4439571399493145e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3430053.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2911.5, | |
| "epoch": 0.005804291206746868, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4033009983067452e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3442327.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3780.0, | |
| "epoch": 0.005829095870023565, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3631317921347562e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3458067.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3959.25, | |
| "epoch": 0.00585390053330026, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3234549597008572e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3475240.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1576.75, | |
| "epoch": 0.005878705196576957, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.284275872613028e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3482159.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2267.5, | |
| "epoch": 0.005903509859853652, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.245599835092504e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3491753.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6582.25, | |
| "epoch": 0.005928314523130349, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2074320832556556e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3518766.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4409.5, | |
| "epoch": 0.005953119186407045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1697777844051104e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3537124.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 744.75, | |
| "epoch": 0.0059779238496837405, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1326420363301808e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3540559.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1890.0, | |
| "epoch": 0.006002728512960437, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.096029866616704e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3549251.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1161.75, | |
| "epoch": 0.006027533176237132, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0599462319663904e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3554662.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2727.0, | |
| "epoch": 0.006052337839513829, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0243960175257605e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3566014.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1661.25, | |
| "epoch": 0.006077142502790524, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.893840362247807e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3574155.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3048.5, | |
| "epoch": 0.006101947166067221, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.549150281252632e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3586801.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4632.0, | |
| "epoch": 0.006126751829343917, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.209936597791407e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3606109.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1765.5, | |
| "epoch": 0.006151556492620613, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.876245235966883e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3613815.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4083.75, | |
| "epoch": 0.006176361155897309, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.548121372247919e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3630678.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4663.75, | |
| "epoch": 0.0062011658191740045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.225609429353186e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3649817.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2062.0, | |
| "epoch": 0.006225970482450701, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.908753070237124e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3658493.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1659.25, | |
| "epoch": 0.006250775145727396, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.597595192178702e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3665746.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1894.0, | |
| "epoch": 0.006275579809004093, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.292177920973724e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3673830.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5424.25, | |
| "epoch": 0.006300384472280789, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.992542605231738e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3695967.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 498.25, | |
| "epoch": 0.006325189135557485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.698729810778064e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3698604.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.006349993798834181, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.410779315161886e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3732176.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3048.0, | |
| "epoch": 0.006374798462110877, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.128730102270896e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3745092.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2695.25, | |
| "epoch": 0.006399603125387573, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.8526203570536504e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3756593.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 845.75, | |
| "epoch": 0.0064244077886642685, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.582487460349805e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3760404.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2017.75, | |
| "epoch": 0.006449212451940965, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.318367983829392e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3769003.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8191.5, | |
| "epoch": 0.006474017115217661, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.060297685041659e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3802305.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1256.5, | |
| "epoch": 0.006498821778494357, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.808311502573975e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3808003.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7104.5, | |
| "epoch": 0.006523626441771053, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.5624435513217873e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3837061.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3343.25, | |
| "epoch": 0.006548431105047749, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.322727117869951e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3851114.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5981.75, | |
| "epoch": 0.006573235768324445, | |
| "grad_norm": 0.20941807329654694, | |
| "learning_rate": 4.089194655986306e-08, | |
| "loss": 0.1413, | |
| "num_tokens": 3875585.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2008.5, | |
| "epoch": 0.0065980404316011414, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.861877782227885e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3884311.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6928.75, | |
| "epoch": 0.006622845094877837, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.6408072716606345e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3912594.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 4809.0, | |
| "epoch": 0.006647649758154533, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.426013053692878e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3932422.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6333.5, | |
| "epoch": 0.006672454421431229, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.217524208023431e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3958284.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2418.75, | |
| "epoch": 0.006697259084707925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.015368960704584e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3968403.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6057.25, | |
| "epoch": 0.006722063747984621, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.8195746803208242e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3993200.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 821.5, | |
| "epoch": 0.006746868411261317, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6301678742835397e-08, | |
| "loss": 0.0, | |
| "num_tokens": 3996950.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.0067716730745380135, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4471741852423233e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4030314.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2693.5, | |
| "epoch": 0.006796477737814709, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.2706183876134045e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4041892.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 965.25, | |
| "epoch": 0.0068212824010914054, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.100524384225555e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4046237.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3427.25, | |
| "epoch": 0.006846087064368101, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.936915203084055e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4060802.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 7794.75, | |
| "epoch": 0.006870891727644797, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7798129942530548e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4092813.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6520.0, | |
| "epoch": 0.006895696390921493, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.62923902685681e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4119641.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1263.0, | |
| "epoch": 0.006920501054198189, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4852136862001763e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4125333.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1784.0, | |
| "epoch": 0.006945305717474886, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3477564710088096e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4132957.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 5096.0, | |
| "epoch": 0.006970110380751581, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.2168859907892902e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4154117.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1427.0, | |
| "epoch": 0.0069949150440282775, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0926199633097154e-08, | |
| "loss": 0.0, | |
| "num_tokens": 4160381.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 862.25, | |
| "epoch": 0.007019719707304973, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.749752122010346e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4164318.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6548.25, | |
| "epoch": 0.0070445243705816694, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.639676646793382e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4191083.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1828.5, | |
| "epoch": 0.007069329033858365, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.59612349389599e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4198809.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1426.0, | |
| "epoch": 0.007094133697135061, | |
| "grad_norm": 0.405701607465744, | |
| "learning_rate": 6.61923394371039e-09, | |
| "loss": 0.0284, | |
| "num_tokens": 4204961.0, | |
| "reward": 0.75, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.75, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6286.25, | |
| "epoch": 0.007118938360411758, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.7091402514442e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4230786.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1944.25, | |
| "epoch": 0.007143743023688453, | |
| "grad_norm": 0.5905258655548096, | |
| "learning_rate": 4.865965629214819e-09, | |
| "loss": -0.042, | |
| "num_tokens": 4239071.0, | |
| "reward": 0.25, | |
| "reward_std": 0.5, | |
| "rewards/accuracy_reward": 0.25, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 860.5, | |
| "epoch": 0.00716854768696515, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.089824229369154e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4243021.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 669.5, | |
| "epoch": 0.007193352350241845, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.380821129028488e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4246303.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1785.75, | |
| "epoch": 0.0072181570135185415, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.739052315863355e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4254082.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 3101.5, | |
| "epoch": 0.007242961676795238, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.1646046750978253e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4266976.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 608.25, | |
| "epoch": 0.0072677663400719334, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6575559777469717e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4269801.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 6663.0, | |
| "epoch": 0.00729257100334863, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.217974870087901e-09, | |
| "loss": 0.0, | |
| "num_tokens": 4297549.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 803.75, | |
| "epoch": 0.007317375666625325, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.459208643659121e-10, | |
| "loss": 0.0, | |
| "num_tokens": 4301396.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1361.5, | |
| "epoch": 0.007342180329902022, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.41444330737717e-10, | |
| "loss": 0.0, | |
| "num_tokens": 4307358.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 532.5, | |
| "epoch": 0.007366984993178717, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.0458649045211894e-10, | |
| "loss": 0.0, | |
| "num_tokens": 4309896.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1515.5, | |
| "epoch": 0.007391789656455414, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3537941026914302e-10, | |
| "loss": 0.0, | |
| "num_tokens": 4316658.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 8192.0, | |
| "epoch": 0.00741659431973211, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3845998118897657e-11, | |
| "loss": 0.0, | |
| "num_tokens": 4350074.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 2304.25, | |
| "epoch": 0.0074413989830088055, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "num_tokens": 4359795.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 1.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0074413989830088055, | |
| "step": 300, | |
| "total_flos": 0.0, | |
| "train_loss": 0.001285893005115213, | |
| "train_runtime": 7938.9492, | |
| "train_samples_per_second": 0.151, | |
| "train_steps_per_second": 0.038 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |