| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 255, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 378.0, | |
| "completions/max_terminated_length": 378.0, | |
| "completions/mean_length": 116.875, | |
| "completions/mean_terminated_length": 116.875, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.3960496634244919, | |
| "epoch": 0.011764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3658151626586914, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 120704.0, | |
| "reward": 0.42291906476020813, | |
| "reward_std": 0.353160560131073, | |
| "rewards/rna_reward_fn/mean": 0.42291906476020813, | |
| "rewards/rna_reward_fn/std": 0.39480823278427124, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.34375, | |
| "completions/mean_terminated_length": 145.34375, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "entropy": 0.3918581157922745, | |
| "epoch": 0.023529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3561055362224579, | |
| "learning_rate": 9.96078431372549e-07, | |
| "loss": 0.0, | |
| "num_tokens": 270560.0, | |
| "reward": 0.4679465889930725, | |
| "reward_std": 0.304127037525177, | |
| "rewards/rna_reward_fn/mean": 0.4679465889930725, | |
| "rewards/rna_reward_fn/std": 0.37357842922210693, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 169.4375, | |
| "completions/mean_terminated_length": 169.4375, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.3528731167316437, | |
| "epoch": 0.03529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3573973476886749, | |
| "learning_rate": 9.92156862745098e-07, | |
| "loss": 0.0, | |
| "num_tokens": 445088.0, | |
| "reward": 0.4688035249710083, | |
| "reward_std": 0.3215726613998413, | |
| "rewards/rna_reward_fn/mean": 0.4688035249710083, | |
| "rewards/rna_reward_fn/std": 0.3945569097995758, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.53125, | |
| "completions/mean_terminated_length": 164.53125, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.3565346747636795, | |
| "epoch": 0.047058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.37075310945510864, | |
| "learning_rate": 9.88235294117647e-07, | |
| "loss": -0.0, | |
| "num_tokens": 614592.0, | |
| "reward": 0.5333437323570251, | |
| "reward_std": 0.3202625513076782, | |
| "rewards/rna_reward_fn/mean": 0.5333437323570251, | |
| "rewards/rna_reward_fn/std": 0.3746815025806427, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 103.3125, | |
| "completions/mean_terminated_length": 103.3125, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "entropy": 0.35146908462047577, | |
| "epoch": 0.058823529411764705, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34449008107185364, | |
| "learning_rate": 9.84313725490196e-07, | |
| "loss": -0.0, | |
| "num_tokens": 721408.0, | |
| "reward": 0.5266900062561035, | |
| "reward_std": 0.32159364223480225, | |
| "rewards/rna_reward_fn/mean": 0.5266900062561035, | |
| "rewards/rna_reward_fn/std": 0.3701845705509186, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 161.25, | |
| "completions/mean_terminated_length": 161.25, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.3309106081724167, | |
| "epoch": 0.07058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.35763484239578247, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": -0.0, | |
| "num_tokens": 887552.0, | |
| "reward": 0.5357265472412109, | |
| "reward_std": 0.2797412872314453, | |
| "rewards/rna_reward_fn/mean": 0.5357265472412109, | |
| "rewards/rna_reward_fn/std": 0.3577335476875305, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.375, | |
| "completions/mean_terminated_length": 151.375, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.34717176854610443, | |
| "epoch": 0.08235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3663802146911621, | |
| "learning_rate": 9.76470588235294e-07, | |
| "loss": -0.0, | |
| "num_tokens": 1043584.0, | |
| "reward": 0.547458291053772, | |
| "reward_std": 0.2995288372039795, | |
| "rewards/rna_reward_fn/mean": 0.547458291053772, | |
| "rewards/rna_reward_fn/std": 0.3604092001914978, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 482.0, | |
| "completions/max_terminated_length": 482.0, | |
| "completions/mean_length": 167.125, | |
| "completions/mean_terminated_length": 167.125, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.31340789794921875, | |
| "epoch": 0.09411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4071066081523895, | |
| "learning_rate": 9.725490196078432e-07, | |
| "loss": -0.0, | |
| "num_tokens": 1215744.0, | |
| "reward": 0.5176310539245605, | |
| "reward_std": 0.3205966353416443, | |
| "rewards/rna_reward_fn/mean": 0.5176310539245605, | |
| "rewards/rna_reward_fn/std": 0.3642078638076782, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.59375, | |
| "completions/mean_terminated_length": 151.59375, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.305365189909935, | |
| "epoch": 0.10588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3989139795303345, | |
| "learning_rate": 9.686274509803921e-07, | |
| "loss": -0.0, | |
| "num_tokens": 1372000.0, | |
| "reward": 0.6008568406105042, | |
| "reward_std": 0.30818045139312744, | |
| "rewards/rna_reward_fn/mean": 0.6008569002151489, | |
| "rewards/rna_reward_fn/std": 0.35290631651878357, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 135.53125, | |
| "completions/mean_terminated_length": 135.53125, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.2962174266576767, | |
| "epoch": 0.11764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.44642144441604614, | |
| "learning_rate": 9.64705882352941e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1511808.0, | |
| "reward": 0.540717601776123, | |
| "reward_std": 0.3060719966888428, | |
| "rewards/rna_reward_fn/mean": 0.540717601776123, | |
| "rewards/rna_reward_fn/std": 0.36574023962020874, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 187.71875, | |
| "completions/mean_terminated_length": 187.71875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.2934599667787552, | |
| "epoch": 0.12941176470588237, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3814420700073242, | |
| "learning_rate": 9.607843137254902e-07, | |
| "loss": -0.0, | |
| "num_tokens": 1705056.0, | |
| "reward": 0.6084277629852295, | |
| "reward_std": 0.3016743063926697, | |
| "rewards/rna_reward_fn/mean": 0.6084277629852295, | |
| "rewards/rna_reward_fn/std": 0.37008586525917053, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 123.65625, | |
| "completions/mean_terminated_length": 123.65625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.28613443672657013, | |
| "epoch": 0.1411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.34958702325820923, | |
| "learning_rate": 9.568627450980392e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1832704.0, | |
| "reward": 0.6017879247665405, | |
| "reward_std": 0.3006741404533386, | |
| "rewards/rna_reward_fn/mean": 0.6017879247665405, | |
| "rewards/rna_reward_fn/std": 0.35490649938583374, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.65625, | |
| "completions/mean_terminated_length": 140.65625, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.277506560087204, | |
| "epoch": 0.15294117647058825, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5352854132652283, | |
| "learning_rate": 9.529411764705881e-07, | |
| "loss": 0.0, | |
| "num_tokens": 1977760.0, | |
| "reward": 0.571915328502655, | |
| "reward_std": 0.2985040843486786, | |
| "rewards/rna_reward_fn/mean": 0.5719153881072998, | |
| "rewards/rna_reward_fn/std": 0.3767135441303253, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 424.0, | |
| "completions/max_terminated_length": 424.0, | |
| "completions/mean_length": 154.03125, | |
| "completions/mean_terminated_length": 154.03125, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.2907712608575821, | |
| "epoch": 0.16470588235294117, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.40935981273651123, | |
| "learning_rate": 9.490196078431371e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2136512.0, | |
| "reward": 0.5937778353691101, | |
| "reward_std": 0.270163893699646, | |
| "rewards/rna_reward_fn/mean": 0.5937778353691101, | |
| "rewards/rna_reward_fn/std": 0.3509018123149872, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 184.40625, | |
| "completions/mean_terminated_length": 184.40625, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.27846619486808777, | |
| "epoch": 0.17647058823529413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.41769424080848694, | |
| "learning_rate": 9.450980392156862e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2326368.0, | |
| "reward": 0.6163018941879272, | |
| "reward_std": 0.26538053154945374, | |
| "rewards/rna_reward_fn/mean": 0.6163018941879272, | |
| "rewards/rna_reward_fn/std": 0.3496814966201782, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 389.0, | |
| "completions/max_terminated_length": 389.0, | |
| "completions/mean_length": 117.84375, | |
| "completions/mean_terminated_length": 117.84375, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.2604786157608032, | |
| "epoch": 0.18823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3845226764678955, | |
| "learning_rate": 9.411764705882352e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2448064.0, | |
| "reward": 0.5925071239471436, | |
| "reward_std": 0.2943580150604248, | |
| "rewards/rna_reward_fn/mean": 0.5925071239471436, | |
| "rewards/rna_reward_fn/std": 0.3674796521663666, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 112.125, | |
| "completions/mean_terminated_length": 112.125, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.25712524354457855, | |
| "epoch": 0.2, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.39543959498405457, | |
| "learning_rate": 9.372549019607843e-07, | |
| "loss": -0.0, | |
| "num_tokens": 2563904.0, | |
| "reward": 0.5904660224914551, | |
| "reward_std": 0.26803961396217346, | |
| "rewards/rna_reward_fn/mean": 0.5904660224914551, | |
| "rewards/rna_reward_fn/std": 0.3583122193813324, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 138.40625, | |
| "completions/mean_terminated_length": 138.40625, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.27494488656520844, | |
| "epoch": 0.21176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.32191383838653564, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": -0.0, | |
| "num_tokens": 2706656.0, | |
| "reward": 0.6467701196670532, | |
| "reward_std": 0.2634694576263428, | |
| "rewards/rna_reward_fn/mean": 0.6467701196670532, | |
| "rewards/rna_reward_fn/std": 0.3313148319721222, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 137.6875, | |
| "completions/mean_terminated_length": 137.6875, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.260918065905571, | |
| "epoch": 0.2235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4905475378036499, | |
| "learning_rate": 9.294117647058824e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2848672.0, | |
| "reward": 0.5871793031692505, | |
| "reward_std": 0.25154006481170654, | |
| "rewards/rna_reward_fn/mean": 0.5871793031692505, | |
| "rewards/rna_reward_fn/std": 0.3587729334831238, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 166.78125, | |
| "completions/mean_terminated_length": 166.78125, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.26801037788391113, | |
| "epoch": 0.23529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7330372929573059, | |
| "learning_rate": 9.254901960784314e-07, | |
| "loss": -0.0, | |
| "num_tokens": 3020480.0, | |
| "reward": 0.5460379123687744, | |
| "reward_std": 0.27695512771606445, | |
| "rewards/rna_reward_fn/mean": 0.5460379123687744, | |
| "rewards/rna_reward_fn/std": 0.37495046854019165, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.6875, | |
| "completions/mean_terminated_length": 142.6875, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "entropy": 0.26508544385433197, | |
| "epoch": 0.24705882352941178, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4575193524360657, | |
| "learning_rate": 9.215686274509803e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3167616.0, | |
| "reward": 0.6192805171012878, | |
| "reward_std": 0.2736813426017761, | |
| "rewards/rna_reward_fn/mean": 0.6192805171012878, | |
| "rewards/rna_reward_fn/std": 0.3539046049118042, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 154.25, | |
| "completions/mean_terminated_length": 154.25, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.25467583537101746, | |
| "epoch": 0.25882352941176473, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.407436341047287, | |
| "learning_rate": 9.176470588235293e-07, | |
| "loss": 0.0, | |
| "num_tokens": 3326592.0, | |
| "reward": 0.5778753757476807, | |
| "reward_std": 0.27449485659599304, | |
| "rewards/rna_reward_fn/mean": 0.5778753757476807, | |
| "rewards/rna_reward_fn/std": 0.3692671060562134, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 376.0, | |
| "completions/max_terminated_length": 376.0, | |
| "completions/mean_length": 135.46875, | |
| "completions/mean_terminated_length": 135.46875, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.23743800073862076, | |
| "epoch": 0.27058823529411763, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.36481839418411255, | |
| "learning_rate": 9.137254901960783e-07, | |
| "loss": -0.0, | |
| "num_tokens": 3466336.0, | |
| "reward": 0.6230462193489075, | |
| "reward_std": 0.27385085821151733, | |
| "rewards/rna_reward_fn/mean": 0.6230462193489075, | |
| "rewards/rna_reward_fn/std": 0.35384857654571533, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 159.25, | |
| "completions/mean_terminated_length": 159.25, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "entropy": 0.2592047303915024, | |
| "epoch": 0.2823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.40386804938316345, | |
| "learning_rate": 9.098039215686274e-07, | |
| "loss": -0.0, | |
| "num_tokens": 3630432.0, | |
| "reward": 0.587247908115387, | |
| "reward_std": 0.26836222410202026, | |
| "rewards/rna_reward_fn/mean": 0.587247908115387, | |
| "rewards/rna_reward_fn/std": 0.3811717927455902, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 152.375, | |
| "completions/mean_terminated_length": 152.375, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.23664871603250504, | |
| "epoch": 0.29411764705882354, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.514864444732666, | |
| "learning_rate": 9.058823529411764e-07, | |
| "loss": -0.0, | |
| "num_tokens": 3787488.0, | |
| "reward": 0.6044737696647644, | |
| "reward_std": 0.2556478679180145, | |
| "rewards/rna_reward_fn/mean": 0.6044737696647644, | |
| "rewards/rna_reward_fn/std": 0.3558889329433441, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 418.0, | |
| "completions/max_terminated_length": 418.0, | |
| "completions/mean_length": 140.5, | |
| "completions/mean_terminated_length": 140.5, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.2437874600291252, | |
| "epoch": 0.3058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4290100038051605, | |
| "learning_rate": 9.019607843137255e-07, | |
| "loss": -0.0, | |
| "num_tokens": 3932384.0, | |
| "reward": 0.583857536315918, | |
| "reward_std": 0.2450568526983261, | |
| "rewards/rna_reward_fn/mean": 0.583857536315918, | |
| "rewards/rna_reward_fn/std": 0.3653680384159088, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 164.8125, | |
| "completions/mean_terminated_length": 164.8125, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.24944818764925003, | |
| "epoch": 0.3176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42284926772117615, | |
| "learning_rate": 8.980392156862745e-07, | |
| "loss": -0.0, | |
| "num_tokens": 4102176.0, | |
| "reward": 0.5925735235214233, | |
| "reward_std": 0.2968187630176544, | |
| "rewards/rna_reward_fn/mean": 0.5925735235214233, | |
| "rewards/rna_reward_fn/std": 0.3608212471008301, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 397.0, | |
| "completions/max_terminated_length": 397.0, | |
| "completions/mean_length": 146.1875, | |
| "completions/mean_terminated_length": 146.1875, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.22080854326486588, | |
| "epoch": 0.32941176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4605961740016937, | |
| "learning_rate": 8.941176470588236e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4252896.0, | |
| "reward": 0.5584173202514648, | |
| "reward_std": 0.2890748083591461, | |
| "rewards/rna_reward_fn/mean": 0.5584173202514648, | |
| "rewards/rna_reward_fn/std": 0.3958645462989807, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 175.90625, | |
| "completions/mean_terminated_length": 175.90625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.2321019321680069, | |
| "epoch": 0.3411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5582552552223206, | |
| "learning_rate": 8.901960784313724e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4434048.0, | |
| "reward": 0.5966294407844543, | |
| "reward_std": 0.2823025584220886, | |
| "rewards/rna_reward_fn/mean": 0.5966294407844543, | |
| "rewards/rna_reward_fn/std": 0.3560717701911926, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 174.1875, | |
| "completions/mean_terminated_length": 174.1875, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.21510899811983109, | |
| "epoch": 0.35294117647058826, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49061208963394165, | |
| "learning_rate": 8.862745098039215e-07, | |
| "loss": -0.0, | |
| "num_tokens": 4613440.0, | |
| "reward": 0.5848400592803955, | |
| "reward_std": 0.267974317073822, | |
| "rewards/rna_reward_fn/mean": 0.5848400592803955, | |
| "rewards/rna_reward_fn/std": 0.37775954604148865, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 490.0, | |
| "completions/max_terminated_length": 490.0, | |
| "completions/mean_length": 163.15625, | |
| "completions/mean_terminated_length": 163.15625, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "entropy": 0.2507341653108597, | |
| "epoch": 0.36470588235294116, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.603717029094696, | |
| "learning_rate": 8.823529411764705e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4781536.0, | |
| "reward": 0.6572607159614563, | |
| "reward_std": 0.2553848624229431, | |
| "rewards/rna_reward_fn/mean": 0.6572607159614563, | |
| "rewards/rna_reward_fn/std": 0.3443078398704529, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 170.34375, | |
| "completions/mean_terminated_length": 170.34375, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.2254045456647873, | |
| "epoch": 0.3764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5129714608192444, | |
| "learning_rate": 8.784313725490196e-07, | |
| "loss": -0.0, | |
| "num_tokens": 4956992.0, | |
| "reward": 0.6237974762916565, | |
| "reward_std": 0.2781754732131958, | |
| "rewards/rna_reward_fn/mean": 0.6237974762916565, | |
| "rewards/rna_reward_fn/std": 0.37038782238960266, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 500.0, | |
| "completions/max_terminated_length": 500.0, | |
| "completions/mean_length": 140.96875, | |
| "completions/mean_terminated_length": 140.96875, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.23444515466690063, | |
| "epoch": 0.38823529411764707, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5718296766281128, | |
| "learning_rate": 8.745098039215686e-07, | |
| "loss": -0.0, | |
| "num_tokens": 5102368.0, | |
| "reward": 0.663845956325531, | |
| "reward_std": 0.23731249570846558, | |
| "rewards/rna_reward_fn/mean": 0.6638458967208862, | |
| "rewards/rna_reward_fn/std": 0.3386061191558838, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 426.0, | |
| "completions/max_terminated_length": 426.0, | |
| "completions/mean_length": 135.84375, | |
| "completions/mean_terminated_length": 135.84375, | |
| "completions/min_length": 52.0, | |
| "completions/min_terminated_length": 52.0, | |
| "entropy": 0.21551413834095, | |
| "epoch": 0.4, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.48484402894973755, | |
| "learning_rate": 8.705882352941177e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5242496.0, | |
| "reward": 0.5733575224876404, | |
| "reward_std": 0.2985653281211853, | |
| "rewards/rna_reward_fn/mean": 0.5733575224876404, | |
| "rewards/rna_reward_fn/std": 0.3665997385978699, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 128.34375, | |
| "completions/mean_terminated_length": 128.34375, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.19232773780822754, | |
| "epoch": 0.4117647058823529, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3821423351764679, | |
| "learning_rate": 8.666666666666667e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5374944.0, | |
| "reward": 0.6459628939628601, | |
| "reward_std": 0.27456825971603394, | |
| "rewards/rna_reward_fn/mean": 0.6459628939628601, | |
| "rewards/rna_reward_fn/std": 0.3492187559604645, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 117.28125, | |
| "completions/mean_terminated_length": 117.28125, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.2170068845152855, | |
| "epoch": 0.4235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.519378125667572, | |
| "learning_rate": 8.627450980392156e-07, | |
| "loss": -0.0, | |
| "num_tokens": 5496064.0, | |
| "reward": 0.6556386947631836, | |
| "reward_std": 0.2442726194858551, | |
| "rewards/rna_reward_fn/mean": 0.6556386947631836, | |
| "rewards/rna_reward_fn/std": 0.3574485182762146, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 143.75, | |
| "completions/mean_terminated_length": 143.75, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.23470622301101685, | |
| "epoch": 0.43529411764705883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4268864691257477, | |
| "learning_rate": 8.588235294117646e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5644288.0, | |
| "reward": 0.6998727917671204, | |
| "reward_std": 0.2536011040210724, | |
| "rewards/rna_reward_fn/mean": 0.6998728513717651, | |
| "rewards/rna_reward_fn/std": 0.34483227133750916, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 161.84375, | |
| "completions/mean_terminated_length": 161.84375, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.20661279559135437, | |
| "epoch": 0.4470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49551478028297424, | |
| "learning_rate": 8.549019607843136e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5811040.0, | |
| "reward": 0.60715651512146, | |
| "reward_std": 0.2498263716697693, | |
| "rewards/rna_reward_fn/mean": 0.60715651512146, | |
| "rewards/rna_reward_fn/std": 0.3692743182182312, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 169.09375, | |
| "completions/mean_terminated_length": 169.09375, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.22686513513326645, | |
| "epoch": 0.4588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.539655864238739, | |
| "learning_rate": 8.509803921568627e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5985216.0, | |
| "reward": 0.606254518032074, | |
| "reward_std": 0.27362608909606934, | |
| "rewards/rna_reward_fn/mean": 0.606254518032074, | |
| "rewards/rna_reward_fn/std": 0.37834590673446655, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 158.625, | |
| "completions/mean_terminated_length": 158.625, | |
| "completions/min_length": 43.0, | |
| "completions/min_terminated_length": 43.0, | |
| "entropy": 0.20522872358560562, | |
| "epoch": 0.47058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4427753686904907, | |
| "learning_rate": 8.470588235294117e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6148672.0, | |
| "reward": 0.6244011521339417, | |
| "reward_std": 0.2686484158039093, | |
| "rewards/rna_reward_fn/mean": 0.6244011521339417, | |
| "rewards/rna_reward_fn/std": 0.3721536099910736, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 165.5, | |
| "completions/mean_terminated_length": 165.5, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.22500251233577728, | |
| "epoch": 0.4823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.8924270272254944, | |
| "learning_rate": 8.431372549019608e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6319168.0, | |
| "reward": 0.5321128368377686, | |
| "reward_std": 0.29077643156051636, | |
| "rewards/rna_reward_fn/mean": 0.5321128368377686, | |
| "rewards/rna_reward_fn/std": 0.3840348422527313, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 160.90625, | |
| "completions/mean_terminated_length": 160.90625, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.23232445865869522, | |
| "epoch": 0.49411764705882355, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4376697540283203, | |
| "learning_rate": 8.392156862745098e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6484960.0, | |
| "reward": 0.6353960037231445, | |
| "reward_std": 0.2474566251039505, | |
| "rewards/rna_reward_fn/mean": 0.6353960037231445, | |
| "rewards/rna_reward_fn/std": 0.3577839136123657, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 156.9375, | |
| "completions/mean_terminated_length": 156.9375, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.21899814903736115, | |
| "epoch": 0.5058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5491610765457153, | |
| "learning_rate": 8.352941176470589e-07, | |
| "loss": -0.0, | |
| "num_tokens": 6646688.0, | |
| "reward": 0.6090617775917053, | |
| "reward_std": 0.2399156093597412, | |
| "rewards/rna_reward_fn/mean": 0.6090618371963501, | |
| "rewards/rna_reward_fn/std": 0.35401132702827454, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 161.28125, | |
| "completions/mean_terminated_length": 161.28125, | |
| "completions/min_length": 43.0, | |
| "completions/min_terminated_length": 43.0, | |
| "entropy": 0.2018352746963501, | |
| "epoch": 0.5176470588235295, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4728248715400696, | |
| "learning_rate": 8.313725490196078e-07, | |
| "loss": -0.0, | |
| "num_tokens": 6812864.0, | |
| "reward": 0.5414500832557678, | |
| "reward_std": 0.257457435131073, | |
| "rewards/rna_reward_fn/mean": 0.5414501428604126, | |
| "rewards/rna_reward_fn/std": 0.37554678320884705, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.53125, | |
| "completions/mean_terminated_length": 144.53125, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.21590139716863632, | |
| "epoch": 0.5294117647058824, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45613518357276917, | |
| "learning_rate": 8.274509803921567e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6961888.0, | |
| "reward": 0.5840362310409546, | |
| "reward_std": 0.24920199811458588, | |
| "rewards/rna_reward_fn/mean": 0.5840362310409546, | |
| "rewards/rna_reward_fn/std": 0.3838988244533539, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.59375, | |
| "completions/mean_terminated_length": 151.59375, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.20446214824914932, | |
| "epoch": 0.5411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4725431799888611, | |
| "learning_rate": 8.235294117647058e-07, | |
| "loss": 0.0, | |
| "num_tokens": 7118144.0, | |
| "reward": 0.5587388277053833, | |
| "reward_std": 0.25771480798721313, | |
| "rewards/rna_reward_fn/mean": 0.5587388277053833, | |
| "rewards/rna_reward_fn/std": 0.3881581127643585, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 148.09375, | |
| "completions/mean_terminated_length": 148.09375, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.20715581625699997, | |
| "epoch": 0.5529411764705883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5775709748268127, | |
| "learning_rate": 8.196078431372548e-07, | |
| "loss": -0.0, | |
| "num_tokens": 7270816.0, | |
| "reward": 0.6535854935646057, | |
| "reward_std": 0.23074793815612793, | |
| "rewards/rna_reward_fn/mean": 0.6535854339599609, | |
| "rewards/rna_reward_fn/std": 0.35560858249664307, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 148.25, | |
| "completions/mean_terminated_length": 148.25, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.20631568133831024, | |
| "epoch": 0.5647058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5872889161109924, | |
| "learning_rate": 8.156862745098039e-07, | |
| "loss": -0.0, | |
| "num_tokens": 7423648.0, | |
| "reward": 0.5795817375183105, | |
| "reward_std": 0.26122066378593445, | |
| "rewards/rna_reward_fn/mean": 0.5795817375183105, | |
| "rewards/rna_reward_fn/std": 0.3758288025856018, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 362.0, | |
| "completions/max_terminated_length": 362.0, | |
| "completions/mean_length": 124.71875, | |
| "completions/mean_terminated_length": 124.71875, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.19562938064336777, | |
| "epoch": 0.5764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.450082391500473, | |
| "learning_rate": 8.117647058823529e-07, | |
| "loss": 0.0, | |
| "num_tokens": 7552384.0, | |
| "reward": 0.657599925994873, | |
| "reward_std": 0.24575895071029663, | |
| "rewards/rna_reward_fn/mean": 0.657599925994873, | |
| "rewards/rna_reward_fn/std": 0.31881189346313477, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.53125, | |
| "completions/mean_terminated_length": 144.53125, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.212866373360157, | |
| "epoch": 0.5882352941176471, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4694586992263794, | |
| "learning_rate": 8.07843137254902e-07, | |
| "loss": -0.0, | |
| "num_tokens": 7701408.0, | |
| "reward": 0.5784563422203064, | |
| "reward_std": 0.2643548846244812, | |
| "rewards/rna_reward_fn/mean": 0.5784563422203064, | |
| "rewards/rna_reward_fn/std": 0.3683941066265106, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 138.21875, | |
| "completions/mean_terminated_length": 138.21875, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.17988762259483337, | |
| "epoch": 0.6, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.44588983058929443, | |
| "learning_rate": 8.03921568627451e-07, | |
| "loss": 0.0, | |
| "num_tokens": 7843968.0, | |
| "reward": 0.6563807725906372, | |
| "reward_std": 0.2578202784061432, | |
| "rewards/rna_reward_fn/mean": 0.6563807725906372, | |
| "rewards/rna_reward_fn/std": 0.3404718339443207, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 181.0, | |
| "completions/mean_terminated_length": 181.0, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.22444826364517212, | |
| "epoch": 0.611764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49978184700012207, | |
| "learning_rate": 8e-07, | |
| "loss": -0.0, | |
| "num_tokens": 8030336.0, | |
| "reward": 0.6426054239273071, | |
| "reward_std": 0.2517712712287903, | |
| "rewards/rna_reward_fn/mean": 0.6426054239273071, | |
| "rewards/rna_reward_fn/std": 0.3629717528820038, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 185.40625, | |
| "completions/mean_terminated_length": 185.40625, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.20722465217113495, | |
| "epoch": 0.6235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6321276426315308, | |
| "learning_rate": 7.960784313725489e-07, | |
| "loss": -0.0, | |
| "num_tokens": 8221216.0, | |
| "reward": 0.7105848789215088, | |
| "reward_std": 0.23574814200401306, | |
| "rewards/rna_reward_fn/mean": 0.7105848789215088, | |
| "rewards/rna_reward_fn/std": 0.3385322690010071, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 358.0, | |
| "completions/max_terminated_length": 358.0, | |
| "completions/mean_length": 148.125, | |
| "completions/mean_terminated_length": 148.125, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.19676074385643005, | |
| "epoch": 0.6352941176470588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49395662546157837, | |
| "learning_rate": 7.92156862745098e-07, | |
| "loss": 0.0, | |
| "num_tokens": 8373920.0, | |
| "reward": 0.5770894885063171, | |
| "reward_std": 0.2644929885864258, | |
| "rewards/rna_reward_fn/mean": 0.5770894289016724, | |
| "rewards/rna_reward_fn/std": 0.3790797293186188, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 159.6875, | |
| "completions/mean_terminated_length": 159.6875, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.18705828487873077, | |
| "epoch": 0.6470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4197390079498291, | |
| "learning_rate": 7.88235294117647e-07, | |
| "loss": 0.0, | |
| "num_tokens": 8538464.0, | |
| "reward": 0.5764464139938354, | |
| "reward_std": 0.21550722420215607, | |
| "rewards/rna_reward_fn/mean": 0.5764464139938354, | |
| "rewards/rna_reward_fn/std": 0.364503413438797, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 146.125, | |
| "completions/mean_terminated_length": 146.125, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.21118487417697906, | |
| "epoch": 0.6588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.37146326899528503, | |
| "learning_rate": 7.84313725490196e-07, | |
| "loss": 0.0, | |
| "num_tokens": 8689120.0, | |
| "reward": 0.6104137897491455, | |
| "reward_std": 0.23754771053791046, | |
| "rewards/rna_reward_fn/mean": 0.6104137897491455, | |
| "rewards/rna_reward_fn/std": 0.3665221333503723, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 160.65625, | |
| "completions/mean_terminated_length": 160.65625, | |
| "completions/min_length": 41.0, | |
| "completions/min_terminated_length": 41.0, | |
| "entropy": 0.1945827156305313, | |
| "epoch": 0.6705882352941176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4072308838367462, | |
| "learning_rate": 7.803921568627451e-07, | |
| "loss": 0.0, | |
| "num_tokens": 8854656.0, | |
| "reward": 0.6713041067123413, | |
| "reward_std": 0.2212895005941391, | |
| "rewards/rna_reward_fn/mean": 0.6713041067123413, | |
| "rewards/rna_reward_fn/std": 0.3392506539821625, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.125, | |
| "completions/mean_terminated_length": 142.125, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.18257632106542587, | |
| "epoch": 0.6823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4942987561225891, | |
| "learning_rate": 7.764705882352941e-07, | |
| "loss": 0.0, | |
| "num_tokens": 9001216.0, | |
| "reward": 0.6629120707511902, | |
| "reward_std": 0.22726097702980042, | |
| "rewards/rna_reward_fn/mean": 0.6629120707511902, | |
| "rewards/rna_reward_fn/std": 0.31348657608032227, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 147.03125, | |
| "completions/mean_terminated_length": 147.03125, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.20158874243497849, | |
| "epoch": 0.6941176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5187806487083435, | |
| "learning_rate": 7.725490196078432e-07, | |
| "loss": 0.0, | |
| "num_tokens": 9152800.0, | |
| "reward": 0.6476730108261108, | |
| "reward_std": 0.24552714824676514, | |
| "rewards/rna_reward_fn/mean": 0.6476730108261108, | |
| "rewards/rna_reward_fn/std": 0.33643367886543274, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 159.4375, | |
| "completions/mean_terminated_length": 159.4375, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.18591003119945526, | |
| "epoch": 0.7058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.36044302582740784, | |
| "learning_rate": 7.686274509803921e-07, | |
| "loss": 0.0, | |
| "num_tokens": 9317088.0, | |
| "reward": 0.6832787394523621, | |
| "reward_std": 0.22806429862976074, | |
| "rewards/rna_reward_fn/mean": 0.6832787394523621, | |
| "rewards/rna_reward_fn/std": 0.32348689436912537, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 474.0, | |
| "completions/max_terminated_length": 474.0, | |
| "completions/mean_length": 160.96875, | |
| "completions/mean_terminated_length": 160.96875, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.21002116054296494, | |
| "epoch": 0.7176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5378114581108093, | |
| "learning_rate": 7.647058823529411e-07, | |
| "loss": -0.0, | |
| "num_tokens": 9482944.0, | |
| "reward": 0.6531599760055542, | |
| "reward_std": 0.22567519545555115, | |
| "rewards/rna_reward_fn/mean": 0.653160035610199, | |
| "rewards/rna_reward_fn/std": 0.33769848942756653, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 348.0, | |
| "completions/max_terminated_length": 348.0, | |
| "completions/mean_length": 116.75, | |
| "completions/mean_terminated_length": 116.75, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.18150582909584045, | |
| "epoch": 0.7294117647058823, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.39785146713256836, | |
| "learning_rate": 7.607843137254901e-07, | |
| "loss": -0.0, | |
| "num_tokens": 9603520.0, | |
| "reward": 0.565564751625061, | |
| "reward_std": 0.2807776927947998, | |
| "rewards/rna_reward_fn/mean": 0.565564751625061, | |
| "rewards/rna_reward_fn/std": 0.38936248421669006, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 391.0, | |
| "completions/max_terminated_length": 391.0, | |
| "completions/mean_length": 147.78125, | |
| "completions/mean_terminated_length": 147.78125, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.189855195581913, | |
| "epoch": 0.7411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4473720192909241, | |
| "learning_rate": 7.568627450980392e-07, | |
| "loss": -0.0, | |
| "num_tokens": 9755872.0, | |
| "reward": 0.6822654008865356, | |
| "reward_std": 0.23419374227523804, | |
| "rewards/rna_reward_fn/mean": 0.6822654008865356, | |
| "rewards/rna_reward_fn/std": 0.32637539505958557, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 171.28125, | |
| "completions/mean_terminated_length": 171.28125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.19365741312503815, | |
| "epoch": 0.7529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5170744061470032, | |
| "learning_rate": 7.529411764705882e-07, | |
| "loss": -0.0, | |
| "num_tokens": 9932288.0, | |
| "reward": 0.6570923328399658, | |
| "reward_std": 0.24268731474876404, | |
| "rewards/rna_reward_fn/mean": 0.6570923328399658, | |
| "rewards/rna_reward_fn/std": 0.3360862731933594, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 138.5625, | |
| "completions/mean_terminated_length": 138.5625, | |
| "completions/min_length": 50.0, | |
| "completions/min_terminated_length": 50.0, | |
| "entropy": 0.15700556337833405, | |
| "epoch": 0.7647058823529411, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.669632077217102, | |
| "learning_rate": 7.490196078431373e-07, | |
| "loss": -0.0, | |
| "num_tokens": 10075200.0, | |
| "reward": 0.5884541273117065, | |
| "reward_std": 0.25077739357948303, | |
| "rewards/rna_reward_fn/mean": 0.5884541869163513, | |
| "rewards/rna_reward_fn/std": 0.3707042634487152, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 147.875, | |
| "completions/mean_terminated_length": 147.875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.1868809014558792, | |
| "epoch": 0.7764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.496881365776062, | |
| "learning_rate": 7.450980392156863e-07, | |
| "loss": -0.0, | |
| "num_tokens": 10227648.0, | |
| "reward": 0.6189287900924683, | |
| "reward_std": 0.23646032810211182, | |
| "rewards/rna_reward_fn/mean": 0.6189287900924683, | |
| "rewards/rna_reward_fn/std": 0.3614950180053711, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 127.75, | |
| "completions/mean_terminated_length": 127.75, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.17434925585985184, | |
| "epoch": 0.788235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5550652742385864, | |
| "learning_rate": 7.411764705882352e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10359488.0, | |
| "reward": 0.5918734073638916, | |
| "reward_std": 0.2727334499359131, | |
| "rewards/rna_reward_fn/mean": 0.5918734073638916, | |
| "rewards/rna_reward_fn/std": 0.35672324895858765, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.96875, | |
| "completions/mean_terminated_length": 151.96875, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "entropy": 0.17505493760108948, | |
| "epoch": 0.8, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3833948075771332, | |
| "learning_rate": 7.372549019607843e-07, | |
| "loss": -0.0, | |
| "num_tokens": 10516128.0, | |
| "reward": 0.7000205516815186, | |
| "reward_std": 0.23740704357624054, | |
| "rewards/rna_reward_fn/mean": 0.7000205516815186, | |
| "rewards/rna_reward_fn/std": 0.3234153985977173, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 141.15625, | |
| "completions/mean_terminated_length": 141.15625, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.17628953605890274, | |
| "epoch": 0.8117647058823529, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3673928678035736, | |
| "learning_rate": 7.333333333333332e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10661696.0, | |
| "reward": 0.6538941860198975, | |
| "reward_std": 0.19288064539432526, | |
| "rewards/rna_reward_fn/mean": 0.6538941860198975, | |
| "rewards/rna_reward_fn/std": 0.3515564203262329, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 195.53125, | |
| "completions/mean_terminated_length": 195.53125, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.18974752724170685, | |
| "epoch": 0.8235294117647058, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.719358503818512, | |
| "learning_rate": 7.294117647058823e-07, | |
| "loss": -0.0, | |
| "num_tokens": 10862944.0, | |
| "reward": 0.5886421203613281, | |
| "reward_std": 0.23114809393882751, | |
| "rewards/rna_reward_fn/mean": 0.5886421203613281, | |
| "rewards/rna_reward_fn/std": 0.36729925870895386, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 156.46875, | |
| "completions/mean_terminated_length": 156.46875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.17211396992206573, | |
| "epoch": 0.8352941176470589, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4566245377063751, | |
| "learning_rate": 7.254901960784313e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11024192.0, | |
| "reward": 0.6206304430961609, | |
| "reward_std": 0.20096182823181152, | |
| "rewards/rna_reward_fn/mean": 0.6206304430961609, | |
| "rewards/rna_reward_fn/std": 0.3349648714065552, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 124.84375, | |
| "completions/mean_terminated_length": 124.84375, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.16766826063394547, | |
| "epoch": 0.8470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4164656698703766, | |
| "learning_rate": 7.215686274509804e-07, | |
| "loss": -0.0, | |
| "num_tokens": 11153056.0, | |
| "reward": 0.6351762413978577, | |
| "reward_std": 0.2213377058506012, | |
| "rewards/rna_reward_fn/mean": 0.6351762413978577, | |
| "rewards/rna_reward_fn/std": 0.3493310809135437, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 412.0, | |
| "completions/max_terminated_length": 412.0, | |
| "completions/mean_length": 129.65625, | |
| "completions/mean_terminated_length": 129.65625, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.16023673117160797, | |
| "epoch": 0.8588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.41499394178390503, | |
| "learning_rate": 7.176470588235294e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11286848.0, | |
| "reward": 0.6752070784568787, | |
| "reward_std": 0.24617840349674225, | |
| "rewards/rna_reward_fn/mean": 0.6752070784568787, | |
| "rewards/rna_reward_fn/std": 0.34732139110565186, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.9375, | |
| "completions/mean_terminated_length": 164.9375, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.18363939225673676, | |
| "epoch": 0.8705882352941177, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45577237010002136, | |
| "learning_rate": 7.137254901960785e-07, | |
| "loss": -0.0, | |
| "num_tokens": 11456768.0, | |
| "reward": 0.5772933959960938, | |
| "reward_std": 0.23847423493862152, | |
| "rewards/rna_reward_fn/mean": 0.5772933959960938, | |
| "rewards/rna_reward_fn/std": 0.3823261260986328, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 492.0, | |
| "completions/max_terminated_length": 492.0, | |
| "completions/mean_length": 188.28125, | |
| "completions/mean_terminated_length": 188.28125, | |
| "completions/min_length": 55.0, | |
| "completions/min_terminated_length": 55.0, | |
| "entropy": 0.1838960349559784, | |
| "epoch": 0.8823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5237012505531311, | |
| "learning_rate": 7.098039215686274e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11650592.0, | |
| "reward": 0.6181286573410034, | |
| "reward_std": 0.2555590569972992, | |
| "rewards/rna_reward_fn/mean": 0.6181286573410034, | |
| "rewards/rna_reward_fn/std": 0.37019652128219604, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 182.1875, | |
| "completions/mean_terminated_length": 182.1875, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.1790659874677658, | |
| "epoch": 0.8941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4818723499774933, | |
| "learning_rate": 7.058823529411765e-07, | |
| "loss": -0.0, | |
| "num_tokens": 11838176.0, | |
| "reward": 0.578412652015686, | |
| "reward_std": 0.22860457003116608, | |
| "rewards/rna_reward_fn/mean": 0.578412652015686, | |
| "rewards/rna_reward_fn/std": 0.35265785455703735, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 200.21875, | |
| "completions/mean_terminated_length": 200.21875, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.18565233796834946, | |
| "epoch": 0.9058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7948216795921326, | |
| "learning_rate": 7.019607843137254e-07, | |
| "loss": 0.0, | |
| "num_tokens": 12044224.0, | |
| "reward": 0.6187993288040161, | |
| "reward_std": 0.2622474431991577, | |
| "rewards/rna_reward_fn/mean": 0.6187993288040161, | |
| "rewards/rna_reward_fn/std": 0.326750248670578, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.8125, | |
| "completions/mean_terminated_length": 145.8125, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.17154797911643982, | |
| "epoch": 0.9176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.47565799951553345, | |
| "learning_rate": 6.980392156862744e-07, | |
| "loss": -0.0, | |
| "num_tokens": 12194560.0, | |
| "reward": 0.5971746444702148, | |
| "reward_std": 0.18512360751628876, | |
| "rewards/rna_reward_fn/mean": 0.5971747040748596, | |
| "rewards/rna_reward_fn/std": 0.3710518777370453, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.0, | |
| "completions/max_terminated_length": 373.0, | |
| "completions/mean_length": 128.71875, | |
| "completions/mean_terminated_length": 128.71875, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.15196984261274338, | |
| "epoch": 0.9294117647058824, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4943343698978424, | |
| "learning_rate": 6.941176470588235e-07, | |
| "loss": -0.0, | |
| "num_tokens": 12327392.0, | |
| "reward": 0.6471496820449829, | |
| "reward_std": 0.22329822182655334, | |
| "rewards/rna_reward_fn/mean": 0.6471496820449829, | |
| "rewards/rna_reward_fn/std": 0.33536407351493835, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 137.84375, | |
| "completions/mean_terminated_length": 137.84375, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.16948848217725754, | |
| "epoch": 0.9411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4759957492351532, | |
| "learning_rate": 6.901960784313725e-07, | |
| "loss": -0.0, | |
| "num_tokens": 12469568.0, | |
| "reward": 0.659608006477356, | |
| "reward_std": 0.18602336943149567, | |
| "rewards/rna_reward_fn/mean": 0.659608006477356, | |
| "rewards/rna_reward_fn/std": 0.3731914460659027, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 146.75, | |
| "completions/mean_terminated_length": 146.75, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.18501683324575424, | |
| "epoch": 0.9529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43785735964775085, | |
| "learning_rate": 6.862745098039216e-07, | |
| "loss": 0.0, | |
| "num_tokens": 12620864.0, | |
| "reward": 0.620478630065918, | |
| "reward_std": 0.22393935918807983, | |
| "rewards/rna_reward_fn/mean": 0.620478630065918, | |
| "rewards/rna_reward_fn/std": 0.35981276631355286, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 150.1875, | |
| "completions/mean_terminated_length": 150.1875, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.1829531416296959, | |
| "epoch": 0.9647058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4392038583755493, | |
| "learning_rate": 6.823529411764706e-07, | |
| "loss": 0.0, | |
| "num_tokens": 12775680.0, | |
| "reward": 0.6712214350700378, | |
| "reward_std": 0.2174052894115448, | |
| "rewards/rna_reward_fn/mean": 0.6712214946746826, | |
| "rewards/rna_reward_fn/std": 0.3370954990386963, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 391.0, | |
| "completions/max_terminated_length": 391.0, | |
| "completions/mean_length": 141.8125, | |
| "completions/mean_terminated_length": 141.8125, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.1686822921037674, | |
| "epoch": 0.9764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4484212398529053, | |
| "learning_rate": 6.784313725490196e-07, | |
| "loss": -0.0, | |
| "num_tokens": 12921920.0, | |
| "reward": 0.6464422345161438, | |
| "reward_std": 0.2250806838274002, | |
| "rewards/rna_reward_fn/mean": 0.6464422345161438, | |
| "rewards/rna_reward_fn/std": 0.3622319996356964, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 166.65625, | |
| "completions/mean_terminated_length": 166.65625, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.17645781487226486, | |
| "epoch": 0.9882352941176471, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7668079137802124, | |
| "learning_rate": 6.745098039215686e-07, | |
| "loss": 0.0, | |
| "num_tokens": 13093600.0, | |
| "reward": 0.6832870244979858, | |
| "reward_std": 0.25750601291656494, | |
| "rewards/rna_reward_fn/mean": 0.6832869648933411, | |
| "rewards/rna_reward_fn/std": 0.3430787920951843, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.96875, | |
| "completions/mean_terminated_length": 167.96875, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.17668870836496353, | |
| "epoch": 1.0, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43097105622291565, | |
| "learning_rate": 6.705882352941176e-07, | |
| "loss": 0.0, | |
| "num_tokens": 13266624.0, | |
| "reward": 0.5539568662643433, | |
| "reward_std": 0.22693298757076263, | |
| "rewards/rna_reward_fn/mean": 0.5539568066596985, | |
| "rewards/rna_reward_fn/std": 0.38347697257995605, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 182.3125, | |
| "completions/mean_terminated_length": 182.3125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.1827656850218773, | |
| "epoch": 1.011764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5608375668525696, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 13454336.0, | |
| "reward": 0.7320628762245178, | |
| "reward_std": 0.22256582975387573, | |
| "rewards/rna_reward_fn/mean": 0.7320628762245178, | |
| "rewards/rna_reward_fn/std": 0.30846187472343445, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.625, | |
| "completions/mean_terminated_length": 140.625, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.18483393639326096, | |
| "epoch": 1.0235294117647058, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4667685627937317, | |
| "learning_rate": 6.627450980392156e-07, | |
| "loss": 0.0, | |
| "num_tokens": 13599360.0, | |
| "reward": 0.6894385814666748, | |
| "reward_std": 0.20523157715797424, | |
| "rewards/rna_reward_fn/mean": 0.6894385814666748, | |
| "rewards/rna_reward_fn/std": 0.3155847191810608, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 150.46875, | |
| "completions/mean_terminated_length": 150.46875, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.16182925552129745, | |
| "epoch": 1.035294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6056375503540039, | |
| "learning_rate": 6.588235294117647e-07, | |
| "loss": -0.0, | |
| "num_tokens": 13754464.0, | |
| "reward": 0.6177388429641724, | |
| "reward_std": 0.24611341953277588, | |
| "rewards/rna_reward_fn/mean": 0.6177388429641724, | |
| "rewards/rna_reward_fn/std": 0.3494950830936432, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.09375, | |
| "completions/mean_terminated_length": 144.09375, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.17024414986371994, | |
| "epoch": 1.0470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4357620179653168, | |
| "learning_rate": 6.549019607843137e-07, | |
| "loss": -0.0, | |
| "num_tokens": 13903040.0, | |
| "reward": 0.611262857913971, | |
| "reward_std": 0.19428220391273499, | |
| "rewards/rna_reward_fn/mean": 0.611262857913971, | |
| "rewards/rna_reward_fn/std": 0.3793390393257141, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 132.5625, | |
| "completions/mean_terminated_length": 132.5625, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.16757714748382568, | |
| "epoch": 1.0588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.440759539604187, | |
| "learning_rate": 6.509803921568627e-07, | |
| "loss": -0.0, | |
| "num_tokens": 14039808.0, | |
| "reward": 0.6882448196411133, | |
| "reward_std": 0.19556942582130432, | |
| "rewards/rna_reward_fn/mean": 0.6882448196411133, | |
| "rewards/rna_reward_fn/std": 0.32508718967437744, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 143.78125, | |
| "completions/mean_terminated_length": 143.78125, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.1645500287413597, | |
| "epoch": 1.0705882352941176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5613058805465698, | |
| "learning_rate": 6.470588235294117e-07, | |
| "loss": 0.0, | |
| "num_tokens": 14188064.0, | |
| "reward": 0.6789584159851074, | |
| "reward_std": 0.19199398159980774, | |
| "rewards/rna_reward_fn/mean": 0.6789584159851074, | |
| "rewards/rna_reward_fn/std": 0.3482169210910797, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 118.34375, | |
| "completions/mean_terminated_length": 118.34375, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.14176590740680695, | |
| "epoch": 1.0823529411764705, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4092370867729187, | |
| "learning_rate": 6.431372549019608e-07, | |
| "loss": -0.0, | |
| "num_tokens": 14310272.0, | |
| "reward": 0.650740921497345, | |
| "reward_std": 0.18103614449501038, | |
| "rewards/rna_reward_fn/mean": 0.650740921497345, | |
| "rewards/rna_reward_fn/std": 0.32734215259552, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 154.3125, | |
| "completions/mean_terminated_length": 154.3125, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.176346056163311, | |
| "epoch": 1.0941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4459090232849121, | |
| "learning_rate": 6.392156862745097e-07, | |
| "loss": 0.0, | |
| "num_tokens": 14469312.0, | |
| "reward": 0.6732466816902161, | |
| "reward_std": 0.22345304489135742, | |
| "rewards/rna_reward_fn/mean": 0.6732466816902161, | |
| "rewards/rna_reward_fn/std": 0.3369784951210022, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.3125, | |
| "completions/mean_terminated_length": 145.3125, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.1685405969619751, | |
| "epoch": 1.1058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5043669939041138, | |
| "learning_rate": 6.352941176470588e-07, | |
| "loss": -0.0, | |
| "num_tokens": 14619136.0, | |
| "reward": 0.677271842956543, | |
| "reward_std": 0.20296773314476013, | |
| "rewards/rna_reward_fn/mean": 0.677271842956543, | |
| "rewards/rna_reward_fn/std": 0.320669025182724, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 170.1875, | |
| "completions/mean_terminated_length": 170.1875, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.18431222438812256, | |
| "epoch": 1.1176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6736860275268555, | |
| "learning_rate": 6.313725490196078e-07, | |
| "loss": -0.0, | |
| "num_tokens": 14794432.0, | |
| "reward": 0.6684234738349915, | |
| "reward_std": 0.259125292301178, | |
| "rewards/rna_reward_fn/mean": 0.6684235334396362, | |
| "rewards/rna_reward_fn/std": 0.34210121631622314, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.6875, | |
| "completions/mean_terminated_length": 157.6875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.16836901009082794, | |
| "epoch": 1.1294117647058823, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4569699764251709, | |
| "learning_rate": 6.274509803921569e-07, | |
| "loss": -0.0, | |
| "num_tokens": 14956928.0, | |
| "reward": 0.68538498878479, | |
| "reward_std": 0.1874302327632904, | |
| "rewards/rna_reward_fn/mean": 0.68538498878479, | |
| "rewards/rna_reward_fn/std": 0.295845091342926, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 393.0, | |
| "completions/max_terminated_length": 393.0, | |
| "completions/mean_length": 140.21875, | |
| "completions/mean_terminated_length": 140.21875, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.158738911151886, | |
| "epoch": 1.1411764705882352, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4725809395313263, | |
| "learning_rate": 6.235294117647059e-07, | |
| "loss": 0.0, | |
| "num_tokens": 15101536.0, | |
| "reward": 0.6654532551765442, | |
| "reward_std": 0.18864062428474426, | |
| "rewards/rna_reward_fn/mean": 0.6654532551765442, | |
| "rewards/rna_reward_fn/std": 0.3371845781803131, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 162.8125, | |
| "completions/mean_terminated_length": 162.8125, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "entropy": 0.17738928645849228, | |
| "epoch": 1.1529411764705881, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5798309445381165, | |
| "learning_rate": 6.196078431372548e-07, | |
| "loss": -0.0, | |
| "num_tokens": 15269280.0, | |
| "reward": 0.7147358655929565, | |
| "reward_std": 0.21203583478927612, | |
| "rewards/rna_reward_fn/mean": 0.7147358655929565, | |
| "rewards/rna_reward_fn/std": 0.33255505561828613, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 168.03125, | |
| "completions/mean_terminated_length": 168.03125, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.17116892337799072, | |
| "epoch": 1.1647058823529413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5520632863044739, | |
| "learning_rate": 6.156862745098039e-07, | |
| "loss": -0.0, | |
| "num_tokens": 15442368.0, | |
| "reward": 0.6365219950675964, | |
| "reward_std": 0.20218491554260254, | |
| "rewards/rna_reward_fn/mean": 0.6365219950675964, | |
| "rewards/rna_reward_fn/std": 0.35175827145576477, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 397.0, | |
| "completions/max_terminated_length": 397.0, | |
| "completions/mean_length": 138.0, | |
| "completions/mean_terminated_length": 138.0, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.17306677252054214, | |
| "epoch": 1.1764705882352942, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4389256238937378, | |
| "learning_rate": 6.117647058823529e-07, | |
| "loss": 0.0, | |
| "num_tokens": 15584704.0, | |
| "reward": 0.7388399839401245, | |
| "reward_std": 0.16607630252838135, | |
| "rewards/rna_reward_fn/mean": 0.7388399839401245, | |
| "rewards/rna_reward_fn/std": 0.2576732635498047, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 392.0, | |
| "completions/max_terminated_length": 392.0, | |
| "completions/mean_length": 137.40625, | |
| "completions/mean_terminated_length": 137.40625, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.15397901087999344, | |
| "epoch": 1.188235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5594757795333862, | |
| "learning_rate": 6.078431372549019e-07, | |
| "loss": -0.0, | |
| "num_tokens": 15726432.0, | |
| "reward": 0.7157045602798462, | |
| "reward_std": 0.22128766775131226, | |
| "rewards/rna_reward_fn/mean": 0.7157045602798462, | |
| "rewards/rna_reward_fn/std": 0.2969537079334259, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 127.78125, | |
| "completions/mean_terminated_length": 127.78125, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.17225481569766998, | |
| "epoch": 1.2, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.40622541308403015, | |
| "learning_rate": 6.039215686274509e-07, | |
| "loss": -0.0, | |
| "num_tokens": 15858304.0, | |
| "reward": 0.7043038010597229, | |
| "reward_std": 0.22727924585342407, | |
| "rewards/rna_reward_fn/mean": 0.7043038606643677, | |
| "rewards/rna_reward_fn/std": 0.33978909254074097, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.625, | |
| "completions/mean_terminated_length": 167.625, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.17464321851730347, | |
| "epoch": 1.2117647058823529, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4603181779384613, | |
| "learning_rate": 6e-07, | |
| "loss": -0.0, | |
| "num_tokens": 16030976.0, | |
| "reward": 0.61054527759552, | |
| "reward_std": 0.22179073095321655, | |
| "rewards/rna_reward_fn/mean": 0.61054527759552, | |
| "rewards/rna_reward_fn/std": 0.37210676074028015, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 156.8125, | |
| "completions/mean_terminated_length": 156.8125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.1658085659146309, | |
| "epoch": 1.223529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4843849539756775, | |
| "learning_rate": 5.96078431372549e-07, | |
| "loss": -0.0, | |
| "num_tokens": 16192576.0, | |
| "reward": 0.6978532075881958, | |
| "reward_std": 0.1981123685836792, | |
| "rewards/rna_reward_fn/mean": 0.6978532671928406, | |
| "rewards/rna_reward_fn/std": 0.3141247630119324, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 181.1875, | |
| "completions/mean_terminated_length": 181.1875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.16212371736764908, | |
| "epoch": 1.2352941176470589, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5290284752845764, | |
| "learning_rate": 5.921568627450981e-07, | |
| "loss": 0.0, | |
| "num_tokens": 16379136.0, | |
| "reward": 0.6463083028793335, | |
| "reward_std": 0.1896321177482605, | |
| "rewards/rna_reward_fn/mean": 0.6463083028793335, | |
| "rewards/rna_reward_fn/std": 0.36457034945487976, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 282.0, | |
| "completions/max_terminated_length": 282.0, | |
| "completions/mean_length": 124.3125, | |
| "completions/mean_terminated_length": 124.3125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.15162574499845505, | |
| "epoch": 1.2470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.47445422410964966, | |
| "learning_rate": 5.88235294117647e-07, | |
| "loss": 0.0, | |
| "num_tokens": 16507456.0, | |
| "reward": 0.672465980052948, | |
| "reward_std": 0.20273976027965546, | |
| "rewards/rna_reward_fn/mean": 0.6724659204483032, | |
| "rewards/rna_reward_fn/std": 0.3352026343345642, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 154.25, | |
| "completions/mean_terminated_length": 154.25, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "entropy": 0.1651393622159958, | |
| "epoch": 1.2588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.48081472516059875, | |
| "learning_rate": 5.843137254901961e-07, | |
| "loss": -0.0, | |
| "num_tokens": 16666432.0, | |
| "reward": 0.6745295524597168, | |
| "reward_std": 0.21466964483261108, | |
| "rewards/rna_reward_fn/mean": 0.6745295524597168, | |
| "rewards/rna_reward_fn/std": 0.3604423701763153, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 176.34375, | |
| "completions/mean_terminated_length": 176.34375, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.16943742334842682, | |
| "epoch": 1.2705882352941176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4931647777557373, | |
| "learning_rate": 5.803921568627451e-07, | |
| "loss": 0.0, | |
| "num_tokens": 16848032.0, | |
| "reward": 0.6875256896018982, | |
| "reward_std": 0.2435401976108551, | |
| "rewards/rna_reward_fn/mean": 0.6875256896018982, | |
| "rewards/rna_reward_fn/std": 0.3279384672641754, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 158.09375, | |
| "completions/mean_terminated_length": 158.09375, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.17465446144342422, | |
| "epoch": 1.2823529411764705, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5001822113990784, | |
| "learning_rate": 5.76470588235294e-07, | |
| "loss": 0.0, | |
| "num_tokens": 17010944.0, | |
| "reward": 0.6029446125030518, | |
| "reward_std": 0.1757221221923828, | |
| "rewards/rna_reward_fn/mean": 0.6029446125030518, | |
| "rewards/rna_reward_fn/std": 0.35652756690979004, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 426.0, | |
| "completions/max_terminated_length": 426.0, | |
| "completions/mean_length": 167.40625, | |
| "completions/mean_terminated_length": 167.40625, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.16541431099176407, | |
| "epoch": 1.2941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4689631760120392, | |
| "learning_rate": 5.725490196078431e-07, | |
| "loss": -0.0, | |
| "num_tokens": 17183392.0, | |
| "reward": 0.6704152226448059, | |
| "reward_std": 0.20997245609760284, | |
| "rewards/rna_reward_fn/mean": 0.6704152226448059, | |
| "rewards/rna_reward_fn/std": 0.32471874356269836, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 398.0, | |
| "completions/max_terminated_length": 398.0, | |
| "completions/mean_length": 141.71875, | |
| "completions/mean_terminated_length": 141.71875, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.1647869274020195, | |
| "epoch": 1.3058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5760033130645752, | |
| "learning_rate": 5.686274509803921e-07, | |
| "loss": -0.0, | |
| "num_tokens": 17329536.0, | |
| "reward": 0.6938682198524475, | |
| "reward_std": 0.20044496655464172, | |
| "rewards/rna_reward_fn/mean": 0.6938682198524475, | |
| "rewards/rna_reward_fn/std": 0.32881274819374084, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 115.96875, | |
| "completions/mean_terminated_length": 115.96875, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.1390109360218048, | |
| "epoch": 1.3176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5902699828147888, | |
| "learning_rate": 5.647058823529412e-07, | |
| "loss": 0.0, | |
| "num_tokens": 17449312.0, | |
| "reward": 0.651271402835846, | |
| "reward_std": 0.17913030087947845, | |
| "rewards/rna_reward_fn/mean": 0.651271402835846, | |
| "rewards/rna_reward_fn/std": 0.3490009009838104, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 179.8125, | |
| "completions/mean_terminated_length": 179.8125, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.16215970367193222, | |
| "epoch": 1.3294117647058823, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6261849403381348, | |
| "learning_rate": 5.607843137254902e-07, | |
| "loss": -0.0, | |
| "num_tokens": 17634464.0, | |
| "reward": 0.6400759220123291, | |
| "reward_std": 0.2095731794834137, | |
| "rewards/rna_reward_fn/mean": 0.6400759220123291, | |
| "rewards/rna_reward_fn/std": 0.34743088483810425, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 139.59375, | |
| "completions/mean_terminated_length": 139.59375, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.17950539290905, | |
| "epoch": 1.3411764705882354, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4431358277797699, | |
| "learning_rate": 5.568627450980392e-07, | |
| "loss": 0.0, | |
| "num_tokens": 17778432.0, | |
| "reward": 0.7148804068565369, | |
| "reward_std": 0.19681406021118164, | |
| "rewards/rna_reward_fn/mean": 0.7148803472518921, | |
| "rewards/rna_reward_fn/std": 0.2995694577693939, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 482.0, | |
| "completions/max_terminated_length": 482.0, | |
| "completions/mean_length": 167.6875, | |
| "completions/mean_terminated_length": 167.6875, | |
| "completions/min_length": 47.0, | |
| "completions/min_terminated_length": 47.0, | |
| "entropy": 0.16394728422164917, | |
| "epoch": 1.3529411764705883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4245275557041168, | |
| "learning_rate": 5.529411764705882e-07, | |
| "loss": -0.0, | |
| "num_tokens": 17951168.0, | |
| "reward": 0.6865168213844299, | |
| "reward_std": 0.21481367945671082, | |
| "rewards/rna_reward_fn/mean": 0.6865168213844299, | |
| "rewards/rna_reward_fn/std": 0.3217703402042389, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 146.6875, | |
| "completions/mean_terminated_length": 146.6875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.16379400342702866, | |
| "epoch": 1.3647058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7668678760528564, | |
| "learning_rate": 5.490196078431373e-07, | |
| "loss": -0.0, | |
| "num_tokens": 18102400.0, | |
| "reward": 0.7100426554679871, | |
| "reward_std": 0.20684288442134857, | |
| "rewards/rna_reward_fn/mean": 0.7100426554679871, | |
| "rewards/rna_reward_fn/std": 0.32808709144592285, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 147.40625, | |
| "completions/mean_terminated_length": 147.40625, | |
| "completions/min_length": 43.0, | |
| "completions/min_terminated_length": 43.0, | |
| "entropy": 0.16369594633579254, | |
| "epoch": 1.3764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4491204619407654, | |
| "learning_rate": 5.450980392156862e-07, | |
| "loss": -0.0, | |
| "num_tokens": 18254368.0, | |
| "reward": 0.6345921754837036, | |
| "reward_std": 0.17989099025726318, | |
| "rewards/rna_reward_fn/mean": 0.6345921754837036, | |
| "rewards/rna_reward_fn/std": 0.3739507794380188, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 128.96875, | |
| "completions/mean_terminated_length": 128.96875, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.16341928392648697, | |
| "epoch": 1.388235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.40218448638916016, | |
| "learning_rate": 5.411764705882353e-07, | |
| "loss": 0.0, | |
| "num_tokens": 18387456.0, | |
| "reward": 0.6973093748092651, | |
| "reward_std": 0.19106432795524597, | |
| "rewards/rna_reward_fn/mean": 0.6973093748092651, | |
| "rewards/rna_reward_fn/std": 0.328565388917923, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 185.3125, | |
| "completions/mean_terminated_length": 185.3125, | |
| "completions/min_length": 55.0, | |
| "completions/min_terminated_length": 55.0, | |
| "entropy": 0.15643662959337234, | |
| "epoch": 1.4, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4641011953353882, | |
| "learning_rate": 5.372549019607843e-07, | |
| "loss": -0.0, | |
| "num_tokens": 18578240.0, | |
| "reward": 0.6982426643371582, | |
| "reward_std": 0.17999790608882904, | |
| "rewards/rna_reward_fn/mean": 0.6982426643371582, | |
| "rewards/rna_reward_fn/std": 0.3187488615512848, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 500.0, | |
| "completions/max_terminated_length": 500.0, | |
| "completions/mean_length": 151.125, | |
| "completions/mean_terminated_length": 151.125, | |
| "completions/min_length": 51.0, | |
| "completions/min_terminated_length": 51.0, | |
| "entropy": 0.16167542338371277, | |
| "epoch": 1.4117647058823528, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4710671305656433, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": -0.0, | |
| "num_tokens": 18734016.0, | |
| "reward": 0.765220046043396, | |
| "reward_std": 0.16310608386993408, | |
| "rewards/rna_reward_fn/mean": 0.765220046043396, | |
| "rewards/rna_reward_fn/std": 0.30073776841163635, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 200.53125, | |
| "completions/mean_terminated_length": 200.53125, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "entropy": 0.17333289235830307, | |
| "epoch": 1.423529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5605267882347107, | |
| "learning_rate": 5.294117647058823e-07, | |
| "loss": -0.0, | |
| "num_tokens": 18940384.0, | |
| "reward": 0.6207563877105713, | |
| "reward_std": 0.2605891227722168, | |
| "rewards/rna_reward_fn/mean": 0.6207563877105713, | |
| "rewards/rna_reward_fn/std": 0.35733622312545776, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 378.0, | |
| "completions/max_terminated_length": 378.0, | |
| "completions/mean_length": 126.90625, | |
| "completions/mean_terminated_length": 126.90625, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.16177111864089966, | |
| "epoch": 1.4352941176470588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5492433905601501, | |
| "learning_rate": 5.254901960784313e-07, | |
| "loss": 0.0, | |
| "num_tokens": 19071360.0, | |
| "reward": 0.6156597137451172, | |
| "reward_std": 0.2084151953458786, | |
| "rewards/rna_reward_fn/mean": 0.6156597137451172, | |
| "rewards/rna_reward_fn/std": 0.3588009178638458, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 389.0, | |
| "completions/max_terminated_length": 389.0, | |
| "completions/mean_length": 126.15625, | |
| "completions/mean_terminated_length": 126.15625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.1655115783214569, | |
| "epoch": 1.4470588235294117, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5015555024147034, | |
| "learning_rate": 5.215686274509804e-07, | |
| "loss": 0.0, | |
| "num_tokens": 19201568.0, | |
| "reward": 0.6790971755981445, | |
| "reward_std": 0.20820938050746918, | |
| "rewards/rna_reward_fn/mean": 0.6790972352027893, | |
| "rewards/rna_reward_fn/std": 0.33763545751571655, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 153.75, | |
| "completions/mean_terminated_length": 153.75, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.1595897227525711, | |
| "epoch": 1.4588235294117646, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5314822793006897, | |
| "learning_rate": 5.176470588235294e-07, | |
| "loss": 0.0, | |
| "num_tokens": 19360032.0, | |
| "reward": 0.6510605812072754, | |
| "reward_std": 0.18497204780578613, | |
| "rewards/rna_reward_fn/mean": 0.6510605812072754, | |
| "rewards/rna_reward_fn/std": 0.3650972247123718, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 134.5625, | |
| "completions/mean_terminated_length": 134.5625, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.1490706205368042, | |
| "epoch": 1.4705882352941178, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5578471422195435, | |
| "learning_rate": 5.137254901960784e-07, | |
| "loss": -0.0, | |
| "num_tokens": 19498848.0, | |
| "reward": 0.6481872797012329, | |
| "reward_std": 0.19116738438606262, | |
| "rewards/rna_reward_fn/mean": 0.6481872797012329, | |
| "rewards/rna_reward_fn/std": 0.32832634449005127, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 186.0625, | |
| "completions/mean_terminated_length": 186.0625, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.16315071284770966, | |
| "epoch": 1.4823529411764707, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.47001388669013977, | |
| "learning_rate": 5.098039215686274e-07, | |
| "loss": 0.0, | |
| "num_tokens": 19690400.0, | |
| "reward": 0.6869475245475769, | |
| "reward_std": 0.21966272592544556, | |
| "rewards/rna_reward_fn/mean": 0.6869475245475769, | |
| "rewards/rna_reward_fn/std": 0.3061429262161255, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 159.25, | |
| "completions/mean_terminated_length": 159.25, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.1544899046421051, | |
| "epoch": 1.4941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7163305878639221, | |
| "learning_rate": 5.058823529411765e-07, | |
| "loss": 0.0, | |
| "num_tokens": 19854496.0, | |
| "reward": 0.7104751467704773, | |
| "reward_std": 0.17693877220153809, | |
| "rewards/rna_reward_fn/mean": 0.7104751467704773, | |
| "rewards/rna_reward_fn/std": 0.30990538001060486, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 134.6875, | |
| "completions/mean_terminated_length": 134.6875, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.16278471052646637, | |
| "epoch": 1.5058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7567697167396545, | |
| "learning_rate": 5.019607843137255e-07, | |
| "loss": -0.0, | |
| "num_tokens": 19993440.0, | |
| "reward": 0.6815826296806335, | |
| "reward_std": 0.20137576758861542, | |
| "rewards/rna_reward_fn/mean": 0.6815826296806335, | |
| "rewards/rna_reward_fn/std": 0.32526591420173645, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.5625, | |
| "completions/mean_terminated_length": 142.5625, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.16126833856105804, | |
| "epoch": 1.5176470588235293, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5958517789840698, | |
| "learning_rate": 4.980392156862744e-07, | |
| "loss": 0.0, | |
| "num_tokens": 20140448.0, | |
| "reward": 0.6496865153312683, | |
| "reward_std": 0.23397710919380188, | |
| "rewards/rna_reward_fn/mean": 0.6496865153312683, | |
| "rewards/rna_reward_fn/std": 0.3660079836845398, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 418.0, | |
| "completions/max_terminated_length": 418.0, | |
| "completions/mean_length": 178.3125, | |
| "completions/mean_terminated_length": 178.3125, | |
| "completions/min_length": 46.0, | |
| "completions/min_terminated_length": 46.0, | |
| "entropy": 0.16705547273159027, | |
| "epoch": 1.5294117647058822, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5045768618583679, | |
| "learning_rate": 4.941176470588235e-07, | |
| "loss": 0.0, | |
| "num_tokens": 20324064.0, | |
| "reward": 0.6084290146827698, | |
| "reward_std": 0.22301070392131805, | |
| "rewards/rna_reward_fn/mean": 0.608428955078125, | |
| "rewards/rna_reward_fn/std": 0.37412387132644653, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 178.21875, | |
| "completions/mean_terminated_length": 178.21875, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.16225259751081467, | |
| "epoch": 1.5411764705882351, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4790975749492645, | |
| "learning_rate": 4.901960784313725e-07, | |
| "loss": -0.0, | |
| "num_tokens": 20507584.0, | |
| "reward": 0.6834284067153931, | |
| "reward_std": 0.16327084600925446, | |
| "rewards/rna_reward_fn/mean": 0.6834284067153931, | |
| "rewards/rna_reward_fn/std": 0.3331601321697235, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 124.46875, | |
| "completions/mean_terminated_length": 124.46875, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.14231518656015396, | |
| "epoch": 1.5529411764705883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45782116055488586, | |
| "learning_rate": 4.862745098039216e-07, | |
| "loss": -0.0, | |
| "num_tokens": 20636064.0, | |
| "reward": 0.6696175336837769, | |
| "reward_std": 0.1951877474784851, | |
| "rewards/rna_reward_fn/mean": 0.6696175336837769, | |
| "rewards/rna_reward_fn/std": 0.3469404876232147, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 153.09375, | |
| "completions/mean_terminated_length": 153.09375, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.14148423075675964, | |
| "epoch": 1.5647058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6516547203063965, | |
| "learning_rate": 4.823529411764705e-07, | |
| "loss": -0.0, | |
| "num_tokens": 20793856.0, | |
| "reward": 0.6711336374282837, | |
| "reward_std": 0.2223963439464569, | |
| "rewards/rna_reward_fn/mean": 0.6711336374282837, | |
| "rewards/rna_reward_fn/std": 0.3334668278694153, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.34375, | |
| "completions/mean_terminated_length": 144.34375, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "entropy": 0.1529795005917549, | |
| "epoch": 1.576470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5148042440414429, | |
| "learning_rate": 4.784313725490196e-07, | |
| "loss": 0.0, | |
| "num_tokens": 20942688.0, | |
| "reward": 0.759110152721405, | |
| "reward_std": 0.16160593926906586, | |
| "rewards/rna_reward_fn/mean": 0.7591102123260498, | |
| "rewards/rna_reward_fn/std": 0.2931617796421051, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 339.0, | |
| "completions/max_terminated_length": 339.0, | |
| "completions/mean_length": 108.34375, | |
| "completions/mean_terminated_length": 108.34375, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.1443817839026451, | |
| "epoch": 1.5882352941176472, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42829352617263794, | |
| "learning_rate": 4.7450980392156857e-07, | |
| "loss": -0.0, | |
| "num_tokens": 21054656.0, | |
| "reward": 0.6639102697372437, | |
| "reward_std": 0.20781482756137848, | |
| "rewards/rna_reward_fn/mean": 0.6639102697372437, | |
| "rewards/rna_reward_fn/std": 0.3437131941318512, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 175.03125, | |
| "completions/mean_terminated_length": 175.03125, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.15896137803792953, | |
| "epoch": 1.6, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5342750549316406, | |
| "learning_rate": 4.705882352941176e-07, | |
| "loss": 0.0, | |
| "num_tokens": 21234912.0, | |
| "reward": 0.6274444460868835, | |
| "reward_std": 0.22071924805641174, | |
| "rewards/rna_reward_fn/mean": 0.6274445056915283, | |
| "rewards/rna_reward_fn/std": 0.3473777174949646, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 398.0, | |
| "completions/max_terminated_length": 398.0, | |
| "completions/mean_length": 143.65625, | |
| "completions/mean_terminated_length": 143.65625, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.15408551692962646, | |
| "epoch": 1.611764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49438202381134033, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "loss": 0.0, | |
| "num_tokens": 21383040.0, | |
| "reward": 0.6316537857055664, | |
| "reward_std": 0.1621330976486206, | |
| "rewards/rna_reward_fn/mean": 0.6316537857055664, | |
| "rewards/rna_reward_fn/std": 0.34947502613067627, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 392.0, | |
| "completions/max_terminated_length": 392.0, | |
| "completions/mean_length": 168.84375, | |
| "completions/mean_terminated_length": 168.84375, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.17249725759029388, | |
| "epoch": 1.6235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5168977379798889, | |
| "learning_rate": 4.627450980392157e-07, | |
| "loss": -0.0, | |
| "num_tokens": 21556960.0, | |
| "reward": 0.7472211122512817, | |
| "reward_std": 0.16369092464447021, | |
| "rewards/rna_reward_fn/mean": 0.7472211122512817, | |
| "rewards/rna_reward_fn/std": 0.27173811197280884, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.1875, | |
| "completions/mean_terminated_length": 157.1875, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.16690535098314285, | |
| "epoch": 1.6352941176470588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5558773875236511, | |
| "learning_rate": 4.5882352941176465e-07, | |
| "loss": 0.0, | |
| "num_tokens": 21718944.0, | |
| "reward": 0.6854004859924316, | |
| "reward_std": 0.19929495453834534, | |
| "rewards/rna_reward_fn/mean": 0.6854004859924316, | |
| "rewards/rna_reward_fn/std": 0.31646665930747986, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 152.59375, | |
| "completions/mean_terminated_length": 152.59375, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.1484585627913475, | |
| "epoch": 1.6470588235294117, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.9384368062019348, | |
| "learning_rate": 4.549019607843137e-07, | |
| "loss": -0.0, | |
| "num_tokens": 21876224.0, | |
| "reward": 0.6835744380950928, | |
| "reward_std": 0.1949320137500763, | |
| "rewards/rna_reward_fn/mean": 0.6835744380950928, | |
| "rewards/rna_reward_fn/std": 0.35554417967796326, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 127.875, | |
| "completions/mean_terminated_length": 127.875, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.14056292921304703, | |
| "epoch": 1.6588235294117646, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4758838713169098, | |
| "learning_rate": 4.5098039215686274e-07, | |
| "loss": 0.0, | |
| "num_tokens": 22008192.0, | |
| "reward": 0.7035012245178223, | |
| "reward_std": 0.18292057514190674, | |
| "rewards/rna_reward_fn/mean": 0.703501284122467, | |
| "rewards/rna_reward_fn/std": 0.29926764965057373, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.59375, | |
| "completions/mean_terminated_length": 164.59375, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.1475282907485962, | |
| "epoch": 1.6705882352941175, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5269675254821777, | |
| "learning_rate": 4.470588235294118e-07, | |
| "loss": -0.0, | |
| "num_tokens": 22177760.0, | |
| "reward": 0.724274754524231, | |
| "reward_std": 0.20411115884780884, | |
| "rewards/rna_reward_fn/mean": 0.724274754524231, | |
| "rewards/rna_reward_fn/std": 0.29461607336997986, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 166.09375, | |
| "completions/mean_terminated_length": 166.09375, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.14830049872398376, | |
| "epoch": 1.6823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5128397345542908, | |
| "learning_rate": 4.4313725490196073e-07, | |
| "loss": 0.0, | |
| "num_tokens": 22348864.0, | |
| "reward": 0.6864579916000366, | |
| "reward_std": 0.18042539060115814, | |
| "rewards/rna_reward_fn/mean": 0.6864579916000366, | |
| "rewards/rna_reward_fn/std": 0.3156171441078186, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 358.0, | |
| "completions/max_terminated_length": 358.0, | |
| "completions/mean_length": 121.21875, | |
| "completions/mean_terminated_length": 121.21875, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.14306584745645523, | |
| "epoch": 1.6941176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4526241421699524, | |
| "learning_rate": 4.392156862745098e-07, | |
| "loss": 0.0, | |
| "num_tokens": 22474016.0, | |
| "reward": 0.6906402111053467, | |
| "reward_std": 0.2201388031244278, | |
| "rewards/rna_reward_fn/mean": 0.6906402111053467, | |
| "rewards/rna_reward_fn/std": 0.3415301740169525, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 398.0, | |
| "completions/max_terminated_length": 398.0, | |
| "completions/mean_length": 111.0625, | |
| "completions/mean_terminated_length": 111.0625, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.14087412506341934, | |
| "epoch": 1.7058823529411766, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4583019018173218, | |
| "learning_rate": 4.352941176470588e-07, | |
| "loss": 0.0, | |
| "num_tokens": 22588768.0, | |
| "reward": 0.7702864408493042, | |
| "reward_std": 0.1817162036895752, | |
| "rewards/rna_reward_fn/mean": 0.7702864408493042, | |
| "rewards/rna_reward_fn/std": 0.28576594591140747, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 152.46875, | |
| "completions/mean_terminated_length": 152.46875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.13646821677684784, | |
| "epoch": 1.7176470588235295, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5821676850318909, | |
| "learning_rate": 4.313725490196078e-07, | |
| "loss": -0.0, | |
| "num_tokens": 22745920.0, | |
| "reward": 0.6735475659370422, | |
| "reward_std": 0.2079792022705078, | |
| "rewards/rna_reward_fn/mean": 0.6735475659370422, | |
| "rewards/rna_reward_fn/std": 0.34127116203308105, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 398.0, | |
| "completions/max_terminated_length": 398.0, | |
| "completions/mean_length": 137.0625, | |
| "completions/mean_terminated_length": 137.0625, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.1294446587562561, | |
| "epoch": 1.7294117647058824, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.47053244709968567, | |
| "learning_rate": 4.274509803921568e-07, | |
| "loss": 0.0, | |
| "num_tokens": 22887296.0, | |
| "reward": 0.7310217618942261, | |
| "reward_std": 0.16372641921043396, | |
| "rewards/rna_reward_fn/mean": 0.7310217618942261, | |
| "rewards/rna_reward_fn/std": 0.29399389028549194, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.03125, | |
| "completions/mean_terminated_length": 164.03125, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.16281016170978546, | |
| "epoch": 1.7411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5588626861572266, | |
| "learning_rate": 4.2352941176470586e-07, | |
| "loss": 0.0, | |
| "num_tokens": 23056288.0, | |
| "reward": 0.654833197593689, | |
| "reward_std": 0.1884084939956665, | |
| "rewards/rna_reward_fn/mean": 0.654833197593689, | |
| "rewards/rna_reward_fn/std": 0.3517378270626068, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.84375, | |
| "completions/mean_terminated_length": 140.84375, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "entropy": 0.15908341854810715, | |
| "epoch": 1.7529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5507121086120605, | |
| "learning_rate": 4.196078431372549e-07, | |
| "loss": 0.0, | |
| "num_tokens": 23201536.0, | |
| "reward": 0.699113667011261, | |
| "reward_std": 0.20187973976135254, | |
| "rewards/rna_reward_fn/mean": 0.699113667011261, | |
| "rewards/rna_reward_fn/std": 0.3249177634716034, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 192.4375, | |
| "completions/mean_terminated_length": 192.4375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "entropy": 0.15749355405569077, | |
| "epoch": 1.7647058823529411, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.47758468985557556, | |
| "learning_rate": 4.156862745098039e-07, | |
| "loss": 0.0, | |
| "num_tokens": 23399616.0, | |
| "reward": 0.6602087020874023, | |
| "reward_std": 0.2426632046699524, | |
| "rewards/rna_reward_fn/mean": 0.6602087020874023, | |
| "rewards/rna_reward_fn/std": 0.3394790291786194, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 186.375, | |
| "completions/mean_terminated_length": 186.375, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.1590714380145073, | |
| "epoch": 1.776470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5084402561187744, | |
| "learning_rate": 4.117647058823529e-07, | |
| "loss": 0.0, | |
| "num_tokens": 23591488.0, | |
| "reward": 0.6650402545928955, | |
| "reward_std": 0.18303653597831726, | |
| "rewards/rna_reward_fn/mean": 0.6650401949882507, | |
| "rewards/rna_reward_fn/std": 0.33965203166007996, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 141.40625, | |
| "completions/mean_terminated_length": 141.40625, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.14213567227125168, | |
| "epoch": 1.788235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5413779020309448, | |
| "learning_rate": 4.0784313725490194e-07, | |
| "loss": -0.0, | |
| "num_tokens": 23737312.0, | |
| "reward": 0.6437839865684509, | |
| "reward_std": 0.2132418155670166, | |
| "rewards/rna_reward_fn/mean": 0.6437839865684509, | |
| "rewards/rna_reward_fn/std": 0.3476622402667999, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.75, | |
| "completions/mean_terminated_length": 140.75, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.14729295670986176, | |
| "epoch": 1.8, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.48154816031455994, | |
| "learning_rate": 4.03921568627451e-07, | |
| "loss": -0.0, | |
| "num_tokens": 23882464.0, | |
| "reward": 0.6620033979415894, | |
| "reward_std": 0.22405345737934113, | |
| "rewards/rna_reward_fn/mean": 0.6620033979415894, | |
| "rewards/rna_reward_fn/std": 0.3390491306781769, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 166.46875, | |
| "completions/mean_terminated_length": 166.46875, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.14903101325035095, | |
| "epoch": 1.811764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.625751793384552, | |
| "learning_rate": 4e-07, | |
| "loss": -0.0, | |
| "num_tokens": 24053952.0, | |
| "reward": 0.6442551612854004, | |
| "reward_std": 0.17395520210266113, | |
| "rewards/rna_reward_fn/mean": 0.6442551612854004, | |
| "rewards/rna_reward_fn/std": 0.3670194745063782, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.90625, | |
| "completions/mean_terminated_length": 157.90625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.15323904901742935, | |
| "epoch": 1.8235294117647058, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.48200494050979614, | |
| "learning_rate": 3.96078431372549e-07, | |
| "loss": -0.0, | |
| "num_tokens": 24216672.0, | |
| "reward": 0.6359031200408936, | |
| "reward_std": 0.17717690765857697, | |
| "rewards/rna_reward_fn/mean": 0.6359031200408936, | |
| "rewards/rna_reward_fn/std": 0.32817214727401733, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 145.8125, | |
| "completions/mean_terminated_length": 145.8125, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.1613752394914627, | |
| "epoch": 1.835294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.46832966804504395, | |
| "learning_rate": 3.92156862745098e-07, | |
| "loss": 0.0, | |
| "num_tokens": 24367008.0, | |
| "reward": 0.7130154371261597, | |
| "reward_std": 0.18193909525871277, | |
| "rewards/rna_reward_fn/mean": 0.7130154371261597, | |
| "rewards/rna_reward_fn/std": 0.3411928117275238, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.46875, | |
| "completions/mean_terminated_length": 142.46875, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.13961906731128693, | |
| "epoch": 1.8470588235294119, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6261844038963318, | |
| "learning_rate": 3.8823529411764707e-07, | |
| "loss": -0.0, | |
| "num_tokens": 24513920.0, | |
| "reward": 0.711245596408844, | |
| "reward_std": 0.1767653077840805, | |
| "rewards/rna_reward_fn/mean": 0.7112456560134888, | |
| "rewards/rna_reward_fn/std": 0.3348366618156433, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 412.0, | |
| "completions/max_terminated_length": 412.0, | |
| "completions/mean_length": 152.4375, | |
| "completions/mean_terminated_length": 152.4375, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "entropy": 0.1567898690700531, | |
| "epoch": 1.8588235294117648, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5200847387313843, | |
| "learning_rate": 3.8431372549019606e-07, | |
| "loss": -0.0, | |
| "num_tokens": 24671040.0, | |
| "reward": 0.7147434949874878, | |
| "reward_std": 0.14905846118927002, | |
| "rewards/rna_reward_fn/mean": 0.7147434949874878, | |
| "rewards/rna_reward_fn/std": 0.3070945739746094, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 392.0, | |
| "completions/max_terminated_length": 392.0, | |
| "completions/mean_length": 125.71875, | |
| "completions/mean_terminated_length": 125.71875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.133110411465168, | |
| "epoch": 1.8705882352941177, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4239906370639801, | |
| "learning_rate": 3.8039215686274506e-07, | |
| "loss": 0.0, | |
| "num_tokens": 24800800.0, | |
| "reward": 0.640139639377594, | |
| "reward_std": 0.20033451914787292, | |
| "rewards/rna_reward_fn/mean": 0.640139639377594, | |
| "rewards/rna_reward_fn/std": 0.3294910490512848, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 376.0, | |
| "completions/max_terminated_length": 376.0, | |
| "completions/mean_length": 134.8125, | |
| "completions/mean_terminated_length": 134.8125, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.12187084183096886, | |
| "epoch": 1.8823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.38697147369384766, | |
| "learning_rate": 3.764705882352941e-07, | |
| "loss": -0.0, | |
| "num_tokens": 24939872.0, | |
| "reward": 0.6659330725669861, | |
| "reward_std": 0.16438628733158112, | |
| "rewards/rna_reward_fn/mean": 0.6659330725669861, | |
| "rewards/rna_reward_fn/std": 0.35713815689086914, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 135.5625, | |
| "completions/mean_terminated_length": 135.5625, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.13703680038452148, | |
| "epoch": 1.8941176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4564237594604492, | |
| "learning_rate": 3.7254901960784315e-07, | |
| "loss": 0.0, | |
| "num_tokens": 25079712.0, | |
| "reward": 0.6596216559410095, | |
| "reward_std": 0.20437049865722656, | |
| "rewards/rna_reward_fn/mean": 0.6596216559410095, | |
| "rewards/rna_reward_fn/std": 0.3517865240573883, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 177.0625, | |
| "completions/mean_terminated_length": 177.0625, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.15036547183990479, | |
| "epoch": 1.9058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45348137617111206, | |
| "learning_rate": 3.6862745098039214e-07, | |
| "loss": -0.0, | |
| "num_tokens": 25262048.0, | |
| "reward": 0.6836435198783875, | |
| "reward_std": 0.20624709129333496, | |
| "rewards/rna_reward_fn/mean": 0.6836435198783875, | |
| "rewards/rna_reward_fn/std": 0.32797813415527344, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 141.71875, | |
| "completions/mean_terminated_length": 141.71875, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.14257021248340607, | |
| "epoch": 1.9176470588235293, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4581199586391449, | |
| "learning_rate": 3.6470588235294114e-07, | |
| "loss": -0.0, | |
| "num_tokens": 25408192.0, | |
| "reward": 0.6231480836868286, | |
| "reward_std": 0.20732316374778748, | |
| "rewards/rna_reward_fn/mean": 0.6231480836868286, | |
| "rewards/rna_reward_fn/std": 0.35448968410491943, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 389.0, | |
| "completions/max_terminated_length": 389.0, | |
| "completions/mean_length": 103.90625, | |
| "completions/mean_terminated_length": 103.90625, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.11931119486689568, | |
| "epoch": 1.9294117647058824, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42869991064071655, | |
| "learning_rate": 3.607843137254902e-07, | |
| "loss": -0.0, | |
| "num_tokens": 25515616.0, | |
| "reward": 0.7718137502670288, | |
| "reward_std": 0.15544265508651733, | |
| "rewards/rna_reward_fn/mean": 0.7718137502670288, | |
| "rewards/rna_reward_fn/std": 0.2820202112197876, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 118.34375, | |
| "completions/mean_terminated_length": 118.34375, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.13630840182304382, | |
| "epoch": 1.9411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4796566069126129, | |
| "learning_rate": 3.5686274509803923e-07, | |
| "loss": 0.0, | |
| "num_tokens": 25637824.0, | |
| "reward": 0.7639800310134888, | |
| "reward_std": 0.16217514872550964, | |
| "rewards/rna_reward_fn/mean": 0.7639800310134888, | |
| "rewards/rna_reward_fn/std": 0.2800072729587555, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 492.0, | |
| "completions/max_terminated_length": 492.0, | |
| "completions/mean_length": 196.1875, | |
| "completions/mean_terminated_length": 196.1875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.1692701205611229, | |
| "epoch": 1.9529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.576678991317749, | |
| "learning_rate": 3.529411764705882e-07, | |
| "loss": 0.0, | |
| "num_tokens": 25839744.0, | |
| "reward": 0.62703537940979, | |
| "reward_std": 0.24643635749816895, | |
| "rewards/rna_reward_fn/mean": 0.62703537940979, | |
| "rewards/rna_reward_fn/std": 0.3669246435165405, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.96875, | |
| "completions/mean_terminated_length": 167.96875, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "entropy": 0.16024480760097504, | |
| "epoch": 1.9647058823529413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7311699390411377, | |
| "learning_rate": 3.490196078431372e-07, | |
| "loss": 0.0, | |
| "num_tokens": 26012768.0, | |
| "reward": 0.6588948369026184, | |
| "reward_std": 0.1576000452041626, | |
| "rewards/rna_reward_fn/mean": 0.6588948965072632, | |
| "rewards/rna_reward_fn/std": 0.32907265424728394, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 201.5, | |
| "completions/mean_terminated_length": 201.5, | |
| "completions/min_length": 48.0, | |
| "completions/min_terminated_length": 48.0, | |
| "entropy": 0.1511036530137062, | |
| "epoch": 1.9764705882352942, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4694945216178894, | |
| "learning_rate": 3.4509803921568627e-07, | |
| "loss": 0.0, | |
| "num_tokens": 26220128.0, | |
| "reward": 0.6976197957992554, | |
| "reward_std": 0.19369524717330933, | |
| "rewards/rna_reward_fn/mean": 0.6976197957992554, | |
| "rewards/rna_reward_fn/std": 0.32611048221588135, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 400.0, | |
| "completions/max_terminated_length": 400.0, | |
| "completions/mean_length": 154.5, | |
| "completions/mean_terminated_length": 154.5, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.15085221827030182, | |
| "epoch": 1.988235294117647, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.7034254670143127, | |
| "learning_rate": 3.411764705882353e-07, | |
| "loss": 0.0, | |
| "num_tokens": 26379360.0, | |
| "reward": 0.6942508220672607, | |
| "reward_std": 0.20178331434726715, | |
| "rewards/rna_reward_fn/mean": 0.6942508220672607, | |
| "rewards/rna_reward_fn/std": 0.31030499935150146, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 490.0, | |
| "completions/max_terminated_length": 490.0, | |
| "completions/mean_length": 160.53125, | |
| "completions/mean_terminated_length": 160.53125, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.15548591315746307, | |
| "epoch": 2.0, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5434289574623108, | |
| "learning_rate": 3.372549019607843e-07, | |
| "loss": -0.0, | |
| "num_tokens": 26544768.0, | |
| "reward": 0.6601583957672119, | |
| "reward_std": 0.15550854802131653, | |
| "rewards/rna_reward_fn/mean": 0.6601583361625671, | |
| "rewards/rna_reward_fn/std": 0.3311554193496704, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 160.40625, | |
| "completions/mean_terminated_length": 160.40625, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.1544594094157219, | |
| "epoch": 2.011764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6815203428268433, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 0.0, | |
| "num_tokens": 26710048.0, | |
| "reward": 0.5972940921783447, | |
| "reward_std": 0.18555977940559387, | |
| "rewards/rna_reward_fn/mean": 0.5972940921783447, | |
| "rewards/rna_reward_fn/std": 0.36445632576942444, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.40625, | |
| "completions/mean_terminated_length": 157.40625, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.14051128178834915, | |
| "epoch": 2.023529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5093562602996826, | |
| "learning_rate": 3.2941176470588235e-07, | |
| "loss": 0.0, | |
| "num_tokens": 26872256.0, | |
| "reward": 0.6649138927459717, | |
| "reward_std": 0.2001783400774002, | |
| "rewards/rna_reward_fn/mean": 0.6649138331413269, | |
| "rewards/rna_reward_fn/std": 0.3582386374473572, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 173.75, | |
| "completions/mean_terminated_length": 173.75, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "entropy": 0.14279819279909134, | |
| "epoch": 2.0352941176470587, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4454724192619324, | |
| "learning_rate": 3.2549019607843134e-07, | |
| "loss": -0.0, | |
| "num_tokens": 27051200.0, | |
| "reward": 0.7748029828071594, | |
| "reward_std": 0.14138856530189514, | |
| "rewards/rna_reward_fn/mean": 0.7748030424118042, | |
| "rewards/rna_reward_fn/std": 0.2777082026004791, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 165.8125, | |
| "completions/mean_terminated_length": 165.8125, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.13190212100744247, | |
| "epoch": 2.0470588235294116, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4604037404060364, | |
| "learning_rate": 3.215686274509804e-07, | |
| "loss": 0.0, | |
| "num_tokens": 27222016.0, | |
| "reward": 0.6792135238647461, | |
| "reward_std": 0.17050443589687347, | |
| "rewards/rna_reward_fn/mean": 0.6792135834693909, | |
| "rewards/rna_reward_fn/std": 0.3469991087913513, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.21875, | |
| "completions/mean_terminated_length": 140.21875, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.11882514134049416, | |
| "epoch": 2.0588235294117645, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42415928840637207, | |
| "learning_rate": 3.176470588235294e-07, | |
| "loss": -0.0, | |
| "num_tokens": 27366624.0, | |
| "reward": 0.618835985660553, | |
| "reward_std": 0.19730809330940247, | |
| "rewards/rna_reward_fn/mean": 0.6188360452651978, | |
| "rewards/rna_reward_fn/std": 0.3514353334903717, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 154.25, | |
| "completions/mean_terminated_length": 154.25, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.12727607041597366, | |
| "epoch": 2.070588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5830354690551758, | |
| "learning_rate": 3.1372549019607843e-07, | |
| "loss": 0.0, | |
| "num_tokens": 27525600.0, | |
| "reward": 0.6785444617271423, | |
| "reward_std": 0.18948182463645935, | |
| "rewards/rna_reward_fn/mean": 0.6785444617271423, | |
| "rewards/rna_reward_fn/std": 0.3351566791534424, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 147.78125, | |
| "completions/mean_terminated_length": 147.78125, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.14719800651073456, | |
| "epoch": 2.0823529411764707, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.4794676899909973, | |
| "learning_rate": 3.098039215686274e-07, | |
| "loss": 0.0, | |
| "num_tokens": 27677952.0, | |
| "reward": 0.7077100276947021, | |
| "reward_std": 0.1931176781654358, | |
| "rewards/rna_reward_fn/mean": 0.7077100276947021, | |
| "rewards/rna_reward_fn/std": 0.3137640357017517, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.46875, | |
| "completions/mean_terminated_length": 142.46875, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.15307611972093582, | |
| "epoch": 2.0941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6268736720085144, | |
| "learning_rate": 3.0588235294117647e-07, | |
| "loss": 0.0, | |
| "num_tokens": 27824864.0, | |
| "reward": 0.7079458236694336, | |
| "reward_std": 0.2219894826412201, | |
| "rewards/rna_reward_fn/mean": 0.7079458236694336, | |
| "rewards/rna_reward_fn/std": 0.3472329080104828, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.0, | |
| "completions/mean_terminated_length": 164.0, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.13749201595783234, | |
| "epoch": 2.1058823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5293802618980408, | |
| "learning_rate": 3.0196078431372546e-07, | |
| "loss": 0.0, | |
| "num_tokens": 27993824.0, | |
| "reward": 0.6385776996612549, | |
| "reward_std": 0.2456386685371399, | |
| "rewards/rna_reward_fn/mean": 0.6385776996612549, | |
| "rewards/rna_reward_fn/std": 0.36081886291503906, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.21875, | |
| "completions/mean_terminated_length": 140.21875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.1387496143579483, | |
| "epoch": 2.1176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.538530707359314, | |
| "learning_rate": 2.980392156862745e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28138432.0, | |
| "reward": 0.6739398241043091, | |
| "reward_std": 0.21720820665359497, | |
| "rewards/rna_reward_fn/mean": 0.6739398837089539, | |
| "rewards/rna_reward_fn/std": 0.30697187781333923, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 118.65625, | |
| "completions/mean_terminated_length": 118.65625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.11488081514835358, | |
| "epoch": 2.1294117647058823, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42285630106925964, | |
| "learning_rate": 2.941176470588235e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28260960.0, | |
| "reward": 0.7317262887954712, | |
| "reward_std": 0.20456328988075256, | |
| "rewards/rna_reward_fn/mean": 0.7317262887954712, | |
| "rewards/rna_reward_fn/std": 0.2935360074043274, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 384.0, | |
| "completions/max_terminated_length": 384.0, | |
| "completions/mean_length": 128.8125, | |
| "completions/mean_terminated_length": 128.8125, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.13038966059684753, | |
| "epoch": 2.1411764705882352, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43837785720825195, | |
| "learning_rate": 2.9019607843137255e-07, | |
| "loss": 0.0, | |
| "num_tokens": 28393888.0, | |
| "reward": 0.7334122657775879, | |
| "reward_std": 0.1874283403158188, | |
| "rewards/rna_reward_fn/mean": 0.7334122657775879, | |
| "rewards/rna_reward_fn/std": 0.3205217123031616, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.1875, | |
| "completions/mean_terminated_length": 142.1875, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.142289437353611, | |
| "epoch": 2.152941176470588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4689069092273712, | |
| "learning_rate": 2.8627450980392154e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28540512.0, | |
| "reward": 0.738664448261261, | |
| "reward_std": 0.16794101893901825, | |
| "rewards/rna_reward_fn/mean": 0.7386645078659058, | |
| "rewards/rna_reward_fn/std": 0.30475351214408875, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 150.1875, | |
| "completions/mean_terminated_length": 150.1875, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.13591318577528, | |
| "epoch": 2.164705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.48003292083740234, | |
| "learning_rate": 2.823529411764706e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28695328.0, | |
| "reward": 0.6993162631988525, | |
| "reward_std": 0.1979941427707672, | |
| "rewards/rna_reward_fn/mean": 0.6993162035942078, | |
| "rewards/rna_reward_fn/std": 0.31292685866355896, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 173.65625, | |
| "completions/mean_terminated_length": 173.65625, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.15518562495708466, | |
| "epoch": 2.176470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6343421339988708, | |
| "learning_rate": 2.784313725490196e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28874176.0, | |
| "reward": 0.7311723232269287, | |
| "reward_std": 0.2127300500869751, | |
| "rewards/rna_reward_fn/mean": 0.7311723232269287, | |
| "rewards/rna_reward_fn/std": 0.3124001622200012, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 137.5625, | |
| "completions/mean_terminated_length": 137.5625, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.1409146785736084, | |
| "epoch": 2.1882352941176473, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.46661409735679626, | |
| "learning_rate": 2.7450980392156863e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29016064.0, | |
| "reward": 0.7118009328842163, | |
| "reward_std": 0.16496126353740692, | |
| "rewards/rna_reward_fn/mean": 0.7118009328842163, | |
| "rewards/rna_reward_fn/std": 0.32205572724342346, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.21875, | |
| "completions/mean_terminated_length": 151.21875, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.14989649504423141, | |
| "epoch": 2.2, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.44188031554222107, | |
| "learning_rate": 2.705882352941176e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29171936.0, | |
| "reward": 0.7327808141708374, | |
| "reward_std": 0.17523989081382751, | |
| "rewards/rna_reward_fn/mean": 0.7327808141708374, | |
| "rewards/rna_reward_fn/std": 0.32806655764579773, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 389.0, | |
| "completions/max_terminated_length": 389.0, | |
| "completions/mean_length": 157.84375, | |
| "completions/mean_terminated_length": 157.84375, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.14322884380817413, | |
| "epoch": 2.211764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5148700475692749, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29334592.0, | |
| "reward": 0.6917252540588379, | |
| "reward_std": 0.17680642008781433, | |
| "rewards/rna_reward_fn/mean": 0.6917252540588379, | |
| "rewards/rna_reward_fn/std": 0.30800244212150574, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.0, | |
| "completions/mean_terminated_length": 164.0, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.14842171967029572, | |
| "epoch": 2.223529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5274482369422913, | |
| "learning_rate": 2.6274509803921567e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29503552.0, | |
| "reward": 0.7333264350891113, | |
| "reward_std": 0.17190617322921753, | |
| "rewards/rna_reward_fn/mean": 0.7333264350891113, | |
| "rewards/rna_reward_fn/std": 0.26974406838417053, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.875, | |
| "completions/mean_terminated_length": 167.875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.12728291004896164, | |
| "epoch": 2.235294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4334995746612549, | |
| "learning_rate": 2.588235294117647e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29676480.0, | |
| "reward": 0.6551768779754639, | |
| "reward_std": 0.18493275344371796, | |
| "rewards/rna_reward_fn/mean": 0.6551768779754639, | |
| "rewards/rna_reward_fn/std": 0.33756914734840393, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 142.59375, | |
| "completions/mean_terminated_length": 142.59375, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.13632921129465103, | |
| "epoch": 2.2470588235294118, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5202718377113342, | |
| "learning_rate": 2.549019607843137e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29823520.0, | |
| "reward": 0.779222309589386, | |
| "reward_std": 0.1619720160961151, | |
| "rewards/rna_reward_fn/mean": 0.779222309589386, | |
| "rewards/rna_reward_fn/std": 0.255502849817276, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 381.0, | |
| "completions/max_terminated_length": 381.0, | |
| "completions/mean_length": 141.8125, | |
| "completions/mean_terminated_length": 141.8125, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.1468304842710495, | |
| "epoch": 2.2588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4959217309951782, | |
| "learning_rate": 2.5098039215686275e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29969760.0, | |
| "reward": 0.6328116655349731, | |
| "reward_std": 0.20429277420043945, | |
| "rewards/rna_reward_fn/mean": 0.6328116655349731, | |
| "rewards/rna_reward_fn/std": 0.3653068244457245, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.0, | |
| "completions/max_terminated_length": 472.0, | |
| "completions/mean_length": 147.03125, | |
| "completions/mean_terminated_length": 147.03125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.14507943391799927, | |
| "epoch": 2.2705882352941176, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.46249526739120483, | |
| "learning_rate": 2.4705882352941175e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30121344.0, | |
| "reward": 0.6946768760681152, | |
| "reward_std": 0.16386722028255463, | |
| "rewards/rna_reward_fn/mean": 0.6946768760681152, | |
| "rewards/rna_reward_fn/std": 0.3166311979293823, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 347.0, | |
| "completions/max_terminated_length": 347.0, | |
| "completions/mean_length": 119.1875, | |
| "completions/mean_terminated_length": 119.1875, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.1289873719215393, | |
| "epoch": 2.2823529411764705, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43832215666770935, | |
| "learning_rate": 2.431372549019608e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30244416.0, | |
| "reward": 0.7309268116950989, | |
| "reward_std": 0.16351744532585144, | |
| "rewards/rna_reward_fn/mean": 0.7309267520904541, | |
| "rewards/rna_reward_fn/std": 0.27468279004096985, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 132.40625, | |
| "completions/mean_terminated_length": 132.40625, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.14909712970256805, | |
| "epoch": 2.2941176470588234, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4866437613964081, | |
| "learning_rate": 2.392156862745098e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30381024.0, | |
| "reward": 0.6669021844863892, | |
| "reward_std": 0.19414769113063812, | |
| "rewards/rna_reward_fn/mean": 0.6669021844863892, | |
| "rewards/rna_reward_fn/std": 0.3391817808151245, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 174.0, | |
| "completions/mean_terminated_length": 174.0, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.14798294007778168, | |
| "epoch": 2.3058823529411763, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.590640127658844, | |
| "learning_rate": 2.352941176470588e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30560224.0, | |
| "reward": 0.6385676860809326, | |
| "reward_std": 0.20142759382724762, | |
| "rewards/rna_reward_fn/mean": 0.6385676860809326, | |
| "rewards/rna_reward_fn/std": 0.34272608160972595, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 125.125, | |
| "completions/mean_terminated_length": 125.125, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.1469191089272499, | |
| "epoch": 2.317647058823529, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4721366763114929, | |
| "learning_rate": 2.3137254901960785e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30689376.0, | |
| "reward": 0.7269188165664673, | |
| "reward_std": 0.19917072355747223, | |
| "rewards/rna_reward_fn/mean": 0.7269188165664673, | |
| "rewards/rna_reward_fn/std": 0.3235536217689514, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 170.21875, | |
| "completions/mean_terminated_length": 170.21875, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.1481616050004959, | |
| "epoch": 2.3294117647058825, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4824952483177185, | |
| "learning_rate": 2.2745098039215685e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30864704.0, | |
| "reward": 0.7315170764923096, | |
| "reward_std": 0.19473856687545776, | |
| "rewards/rna_reward_fn/mean": 0.7315171360969543, | |
| "rewards/rna_reward_fn/std": 0.31163889169692993, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 124.21875, | |
| "completions/mean_terminated_length": 124.21875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.11309440433979034, | |
| "epoch": 2.3411764705882354, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43292057514190674, | |
| "learning_rate": 2.235294117647059e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30992928.0, | |
| "reward": 0.6969711184501648, | |
| "reward_std": 0.18462812900543213, | |
| "rewards/rna_reward_fn/mean": 0.6969711780548096, | |
| "rewards/rna_reward_fn/std": 0.30229660868644714, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 295.0, | |
| "completions/max_terminated_length": 295.0, | |
| "completions/mean_length": 115.625, | |
| "completions/mean_terminated_length": 115.625, | |
| "completions/min_length": 28.0, | |
| "completions/min_terminated_length": 28.0, | |
| "entropy": 0.1170443557202816, | |
| "epoch": 2.3529411764705883, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42780736088752747, | |
| "learning_rate": 2.196078431372549e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31112352.0, | |
| "reward": 0.7397186160087585, | |
| "reward_std": 0.16325643658638, | |
| "rewards/rna_reward_fn/mean": 0.7397185564041138, | |
| "rewards/rna_reward_fn/std": 0.2868645191192627, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 191.78125, | |
| "completions/mean_terminated_length": 191.78125, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "entropy": 0.158894345164299, | |
| "epoch": 2.364705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5923020243644714, | |
| "learning_rate": 2.156862745098039e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31309760.0, | |
| "reward": 0.713019609451294, | |
| "reward_std": 0.1600976586341858, | |
| "rewards/rna_reward_fn/mean": 0.7130196690559387, | |
| "rewards/rna_reward_fn/std": 0.3151859641075134, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 500.0, | |
| "completions/max_terminated_length": 500.0, | |
| "completions/mean_length": 167.15625, | |
| "completions/mean_terminated_length": 167.15625, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "entropy": 0.15573827922344208, | |
| "epoch": 2.376470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5989984273910522, | |
| "learning_rate": 2.1176470588235293e-07, | |
| "loss": -0.0, | |
| "num_tokens": 31481952.0, | |
| "reward": 0.7245238423347473, | |
| "reward_std": 0.21510586142539978, | |
| "rewards/rna_reward_fn/mean": 0.7245238423347473, | |
| "rewards/rna_reward_fn/std": 0.3133554756641388, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 336.0, | |
| "completions/max_terminated_length": 336.0, | |
| "completions/mean_length": 147.15625, | |
| "completions/mean_terminated_length": 147.15625, | |
| "completions/min_length": 37.0, | |
| "completions/min_terminated_length": 37.0, | |
| "entropy": 0.14043358713388443, | |
| "epoch": 2.388235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45242446660995483, | |
| "learning_rate": 2.0784313725490195e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31633664.0, | |
| "reward": 0.6685344576835632, | |
| "reward_std": 0.19693541526794434, | |
| "rewards/rna_reward_fn/mean": 0.6685344576835632, | |
| "rewards/rna_reward_fn/std": 0.33878231048583984, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 160.78125, | |
| "completions/mean_terminated_length": 160.78125, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.14151378720998764, | |
| "epoch": 2.4, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.578268826007843, | |
| "learning_rate": 2.0392156862745097e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31799328.0, | |
| "reward": 0.753953218460083, | |
| "reward_std": 0.14072492718696594, | |
| "rewards/rna_reward_fn/mean": 0.753953218460083, | |
| "rewards/rna_reward_fn/std": 0.323638916015625, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 116.71875, | |
| "completions/mean_terminated_length": 116.71875, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.14078038185834885, | |
| "epoch": 2.411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5669292211532593, | |
| "learning_rate": 2e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31919872.0, | |
| "reward": 0.7278470993041992, | |
| "reward_std": 0.18851059675216675, | |
| "rewards/rna_reward_fn/mean": 0.7278470993041992, | |
| "rewards/rna_reward_fn/std": 0.31520187854766846, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 165.1875, | |
| "completions/mean_terminated_length": 165.1875, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.1560438796877861, | |
| "epoch": 2.4235294117647057, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5335204005241394, | |
| "learning_rate": 1.96078431372549e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32090048.0, | |
| "reward": 0.74782395362854, | |
| "reward_std": 0.16413238644599915, | |
| "rewards/rna_reward_fn/mean": 0.74782395362854, | |
| "rewards/rna_reward_fn/std": 0.27966901659965515, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 129.75, | |
| "completions/mean_terminated_length": 129.75, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "entropy": 0.13756585866212845, | |
| "epoch": 2.435294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4791547358036041, | |
| "learning_rate": 1.9215686274509803e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32223936.0, | |
| "reward": 0.7443541884422302, | |
| "reward_std": 0.20347487926483154, | |
| "rewards/rna_reward_fn/mean": 0.744354248046875, | |
| "rewards/rna_reward_fn/std": 0.2934330999851227, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.46875, | |
| "completions/mean_terminated_length": 144.46875, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.14090368151664734, | |
| "epoch": 2.447058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.48767152428627014, | |
| "learning_rate": 1.8823529411764705e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32372896.0, | |
| "reward": 0.7094341516494751, | |
| "reward_std": 0.1646713763475418, | |
| "rewards/rna_reward_fn/mean": 0.7094341516494751, | |
| "rewards/rna_reward_fn/std": 0.31243574619293213, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 121.375, | |
| "completions/mean_terminated_length": 121.375, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "entropy": 0.13812856376171112, | |
| "epoch": 2.458823529411765, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.43114832043647766, | |
| "learning_rate": 1.8431372549019607e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32498208.0, | |
| "reward": 0.7636112570762634, | |
| "reward_std": 0.1354459822177887, | |
| "rewards/rna_reward_fn/mean": 0.7636112570762634, | |
| "rewards/rna_reward_fn/std": 0.2837965786457062, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.9375, | |
| "completions/mean_terminated_length": 157.9375, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.12325883284211159, | |
| "epoch": 2.4705882352941178, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7042959928512573, | |
| "learning_rate": 1.803921568627451e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32660960.0, | |
| "reward": 0.685276985168457, | |
| "reward_std": 0.14444154500961304, | |
| "rewards/rna_reward_fn/mean": 0.685276985168457, | |
| "rewards/rna_reward_fn/std": 0.3264351785182953, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 398.0, | |
| "completions/max_terminated_length": 398.0, | |
| "completions/mean_length": 149.28125, | |
| "completions/mean_terminated_length": 149.28125, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.14060577005147934, | |
| "epoch": 2.4823529411764707, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7576245665550232, | |
| "learning_rate": 1.764705882352941e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32814848.0, | |
| "reward": 0.7403950691223145, | |
| "reward_std": 0.19349028170108795, | |
| "rewards/rna_reward_fn/mean": 0.7403950691223145, | |
| "rewards/rna_reward_fn/std": 0.31960996985435486, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 140.09375, | |
| "completions/mean_terminated_length": 140.09375, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.128474622964859, | |
| "epoch": 2.4941176470588236, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4394446611404419, | |
| "learning_rate": 1.7254901960784313e-07, | |
| "loss": -0.0, | |
| "num_tokens": 32959328.0, | |
| "reward": 0.7468061447143555, | |
| "reward_std": 0.13857056200504303, | |
| "rewards/rna_reward_fn/mean": 0.7468062043190002, | |
| "rewards/rna_reward_fn/std": 0.2608503997325897, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 144.03125, | |
| "completions/mean_terminated_length": 144.03125, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "entropy": 0.14114519208669662, | |
| "epoch": 2.5058823529411764, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.5121099352836609, | |
| "learning_rate": 1.6862745098039215e-07, | |
| "loss": 0.0, | |
| "num_tokens": 33107840.0, | |
| "reward": 0.6896160244941711, | |
| "reward_std": 0.17474885284900665, | |
| "rewards/rna_reward_fn/mean": 0.6896160244941711, | |
| "rewards/rna_reward_fn/std": 0.30136245489120483, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 196.625, | |
| "completions/mean_terminated_length": 196.625, | |
| "completions/min_length": 37.0, | |
| "completions/min_terminated_length": 37.0, | |
| "entropy": 0.1554037183523178, | |
| "epoch": 2.5176470588235293, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5231500864028931, | |
| "learning_rate": 1.6470588235294117e-07, | |
| "loss": 0.0, | |
| "num_tokens": 33310208.0, | |
| "reward": 0.7346584796905518, | |
| "reward_std": 0.20079070329666138, | |
| "rewards/rna_reward_fn/mean": 0.7346584796905518, | |
| "rewards/rna_reward_fn/std": 0.30361971259117126, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 138.9375, | |
| "completions/mean_terminated_length": 138.9375, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.12060126662254333, | |
| "epoch": 2.5294117647058822, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.45047426223754883, | |
| "learning_rate": 1.607843137254902e-07, | |
| "loss": 0.0, | |
| "num_tokens": 33453504.0, | |
| "reward": 0.768707275390625, | |
| "reward_std": 0.13694067299365997, | |
| "rewards/rna_reward_fn/mean": 0.7687073349952698, | |
| "rewards/rna_reward_fn/std": 0.27220436930656433, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 198.78125, | |
| "completions/mean_terminated_length": 198.78125, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "entropy": 0.1575038880109787, | |
| "epoch": 2.541176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5329861044883728, | |
| "learning_rate": 1.5686274509803921e-07, | |
| "loss": -0.0, | |
| "num_tokens": 33658080.0, | |
| "reward": 0.7541199922561646, | |
| "reward_std": 0.15449070930480957, | |
| "rewards/rna_reward_fn/mean": 0.7541199922561646, | |
| "rewards/rna_reward_fn/std": 0.2656092345714569, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 412.0, | |
| "completions/max_terminated_length": 412.0, | |
| "completions/mean_length": 158.34375, | |
| "completions/mean_terminated_length": 158.34375, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.15501223504543304, | |
| "epoch": 2.552941176470588, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.45992547273635864, | |
| "learning_rate": 1.5294117647058823e-07, | |
| "loss": 0.0, | |
| "num_tokens": 33821248.0, | |
| "reward": 0.7572486400604248, | |
| "reward_std": 0.15161246061325073, | |
| "rewards/rna_reward_fn/mean": 0.7572486400604248, | |
| "rewards/rna_reward_fn/std": 0.29167696833610535, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 169.625, | |
| "completions/mean_terminated_length": 169.625, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.13358986377716064, | |
| "epoch": 2.564705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.8965858817100525, | |
| "learning_rate": 1.4901960784313725e-07, | |
| "loss": -0.0, | |
| "num_tokens": 33995968.0, | |
| "reward": 0.7292990684509277, | |
| "reward_std": 0.16865938901901245, | |
| "rewards/rna_reward_fn/mean": 0.7292990684509277, | |
| "rewards/rna_reward_fn/std": 0.30115416646003723, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 176.78125, | |
| "completions/mean_terminated_length": 176.78125, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.13434413820505142, | |
| "epoch": 2.576470588235294, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.582165002822876, | |
| "learning_rate": 1.4509803921568628e-07, | |
| "loss": -0.0, | |
| "num_tokens": 34178016.0, | |
| "reward": 0.6599196195602417, | |
| "reward_std": 0.196761354804039, | |
| "rewards/rna_reward_fn/mean": 0.6599196791648865, | |
| "rewards/rna_reward_fn/std": 0.33999550342559814, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 156.1875, | |
| "completions/mean_terminated_length": 156.1875, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "entropy": 0.1357617899775505, | |
| "epoch": 2.588235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5189464688301086, | |
| "learning_rate": 1.411764705882353e-07, | |
| "loss": 0.0, | |
| "num_tokens": 34338976.0, | |
| "reward": 0.7549696564674377, | |
| "reward_std": 0.1326015144586563, | |
| "rewards/rna_reward_fn/mean": 0.7549696564674377, | |
| "rewards/rna_reward_fn/std": 0.2852962613105774, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 148.15625, | |
| "completions/mean_terminated_length": 148.15625, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "entropy": 0.15427181124687195, | |
| "epoch": 2.6, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.536194920539856, | |
| "learning_rate": 1.3725490196078432e-07, | |
| "loss": 0.0, | |
| "num_tokens": 34491712.0, | |
| "reward": 0.7131255865097046, | |
| "reward_std": 0.14100758731365204, | |
| "rewards/rna_reward_fn/mean": 0.7131255865097046, | |
| "rewards/rna_reward_fn/std": 0.31784212589263916, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 380.0, | |
| "completions/max_terminated_length": 380.0, | |
| "completions/mean_length": 145.1875, | |
| "completions/mean_terminated_length": 145.1875, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "entropy": 0.13709458708763123, | |
| "epoch": 2.611764705882353, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.5712235569953918, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 0.0, | |
| "num_tokens": 34641408.0, | |
| "reward": 0.7191460132598877, | |
| "reward_std": 0.16943207383155823, | |
| "rewards/rna_reward_fn/mean": 0.7191460132598877, | |
| "rewards/rna_reward_fn/std": 0.3015574514865875, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.1875, | |
| "completions/mean_terminated_length": 145.1875, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.13566020876169205, | |
| "epoch": 2.623529411764706, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.4192090630531311, | |
| "learning_rate": 1.2941176470588236e-07, | |
| "loss": 0.0, | |
| "num_tokens": 34791104.0, | |
| "reward": 0.7555572986602783, | |
| "reward_std": 0.16786056756973267, | |
| "rewards/rna_reward_fn/mean": 0.7555572986602783, | |
| "rewards/rna_reward_fn/std": 0.2797638177871704, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 165.09375, | |
| "completions/mean_terminated_length": 165.09375, | |
| "completions/min_length": 41.0, | |
| "completions/min_terminated_length": 41.0, | |
| "entropy": 0.12663453072309494, | |
| "epoch": 2.635294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6057937145233154, | |
| "learning_rate": 1.2549019607843138e-07, | |
| "loss": -0.0, | |
| "num_tokens": 34961184.0, | |
| "reward": 0.6839346289634705, | |
| "reward_std": 0.19452279806137085, | |
| "rewards/rna_reward_fn/mean": 0.6839346289634705, | |
| "rewards/rna_reward_fn/std": 0.33146002888679504, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.65625, | |
| "completions/mean_terminated_length": 167.65625, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.1426771581172943, | |
| "epoch": 2.6470588235294117, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4763612747192383, | |
| "learning_rate": 1.215686274509804e-07, | |
| "loss": 0.0, | |
| "num_tokens": 35133888.0, | |
| "reward": 0.6619032621383667, | |
| "reward_std": 0.17893120646476746, | |
| "rewards/rna_reward_fn/mean": 0.6619032621383667, | |
| "rewards/rna_reward_fn/std": 0.3283209800720215, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 149.9375, | |
| "completions/mean_terminated_length": 149.9375, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.14778528362512589, | |
| "epoch": 2.6588235294117646, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4169410169124603, | |
| "learning_rate": 1.176470588235294e-07, | |
| "loss": -0.0, | |
| "num_tokens": 35288448.0, | |
| "reward": 0.6732456088066101, | |
| "reward_std": 0.16452832520008087, | |
| "rewards/rna_reward_fn/mean": 0.6732455492019653, | |
| "rewards/rna_reward_fn/std": 0.3249601721763611, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.09375, | |
| "completions/mean_terminated_length": 145.09375, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.1449032723903656, | |
| "epoch": 2.6705882352941175, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6590065360069275, | |
| "learning_rate": 1.1372549019607842e-07, | |
| "loss": -0.0, | |
| "num_tokens": 35438048.0, | |
| "reward": 0.7874460220336914, | |
| "reward_std": 0.12049897015094757, | |
| "rewards/rna_reward_fn/mean": 0.7874460220336914, | |
| "rewards/rna_reward_fn/std": 0.2661431133747101, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 151.75, | |
| "completions/mean_terminated_length": 151.75, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.13789667189121246, | |
| "epoch": 2.682352941176471, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.501124918460846, | |
| "learning_rate": 1.0980392156862744e-07, | |
| "loss": -0.0, | |
| "num_tokens": 35594464.0, | |
| "reward": 0.76551353931427, | |
| "reward_std": 0.14058314263820648, | |
| "rewards/rna_reward_fn/mean": 0.7655135989189148, | |
| "rewards/rna_reward_fn/std": 0.2855876088142395, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 163.71875, | |
| "completions/mean_terminated_length": 163.71875, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "entropy": 0.14094559848308563, | |
| "epoch": 2.6941176470588237, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.736441433429718, | |
| "learning_rate": 1.0588235294117647e-07, | |
| "loss": 0.0, | |
| "num_tokens": 35763136.0, | |
| "reward": 0.6939565539360046, | |
| "reward_std": 0.16584208607673645, | |
| "rewards/rna_reward_fn/mean": 0.6939565539360046, | |
| "rewards/rna_reward_fn/std": 0.32086971402168274, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 139.78125, | |
| "completions/mean_terminated_length": 139.78125, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "entropy": 0.13419293239712715, | |
| "epoch": 2.7058823529411766, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6264002919197083, | |
| "learning_rate": 1.0196078431372549e-07, | |
| "loss": -0.0, | |
| "num_tokens": 35907296.0, | |
| "reward": 0.7488532066345215, | |
| "reward_std": 0.1620199978351593, | |
| "rewards/rna_reward_fn/mean": 0.7488532066345215, | |
| "rewards/rna_reward_fn/std": 0.2980068624019623, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 397.0, | |
| "completions/max_terminated_length": 397.0, | |
| "completions/mean_length": 137.40625, | |
| "completions/mean_terminated_length": 137.40625, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.13055864721536636, | |
| "epoch": 2.7176470588235295, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4814888536930084, | |
| "learning_rate": 9.80392156862745e-08, | |
| "loss": 0.0, | |
| "num_tokens": 36049024.0, | |
| "reward": 0.6655980348587036, | |
| "reward_std": 0.15648490190505981, | |
| "rewards/rna_reward_fn/mean": 0.6655980348587036, | |
| "rewards/rna_reward_fn/std": 0.35470837354660034, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 130.90625, | |
| "completions/mean_terminated_length": 130.90625, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "entropy": 0.12380100041627884, | |
| "epoch": 2.7294117647058824, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.583757221698761, | |
| "learning_rate": 9.411764705882353e-08, | |
| "loss": -0.0, | |
| "num_tokens": 36184096.0, | |
| "reward": 0.7524540424346924, | |
| "reward_std": 0.15423446893692017, | |
| "rewards/rna_reward_fn/mean": 0.7524540424346924, | |
| "rewards/rna_reward_fn/std": 0.28454405069351196, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 399.0, | |
| "completions/max_terminated_length": 399.0, | |
| "completions/mean_length": 145.34375, | |
| "completions/mean_terminated_length": 145.34375, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "entropy": 0.1325184628367424, | |
| "epoch": 2.7411764705882353, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4390006959438324, | |
| "learning_rate": 9.019607843137255e-08, | |
| "loss": -0.0, | |
| "num_tokens": 36333952.0, | |
| "reward": 0.7277975082397461, | |
| "reward_std": 0.19573622941970825, | |
| "rewards/rna_reward_fn/mean": 0.7277975082397461, | |
| "rewards/rna_reward_fn/std": 0.32145431637763977, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 168.125, | |
| "completions/mean_terminated_length": 168.125, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.13657083362340927, | |
| "epoch": 2.7529411764705882, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7681740522384644, | |
| "learning_rate": 8.627450980392157e-08, | |
| "loss": -0.0, | |
| "num_tokens": 36507136.0, | |
| "reward": 0.7168524265289307, | |
| "reward_std": 0.18613344430923462, | |
| "rewards/rna_reward_fn/mean": 0.7168524265289307, | |
| "rewards/rna_reward_fn/std": 0.3243979215621948, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 163.875, | |
| "completions/mean_terminated_length": 163.875, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.14333349466323853, | |
| "epoch": 2.764705882352941, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5657479763031006, | |
| "learning_rate": 8.235294117647059e-08, | |
| "loss": 0.0, | |
| "num_tokens": 36675968.0, | |
| "reward": 0.725771427154541, | |
| "reward_std": 0.16519448161125183, | |
| "rewards/rna_reward_fn/mean": 0.725771427154541, | |
| "rewards/rna_reward_fn/std": 0.29766252636909485, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 418.0, | |
| "completions/max_terminated_length": 418.0, | |
| "completions/mean_length": 156.46875, | |
| "completions/mean_terminated_length": 156.46875, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "entropy": 0.1441263109445572, | |
| "epoch": 2.776470588235294, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.4572143256664276, | |
| "learning_rate": 7.843137254901961e-08, | |
| "loss": 0.0, | |
| "num_tokens": 36837216.0, | |
| "reward": 0.742597222328186, | |
| "reward_std": 0.16114118695259094, | |
| "rewards/rna_reward_fn/mean": 0.742597222328186, | |
| "rewards/rna_reward_fn/std": 0.29970842599868774, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 396.0, | |
| "completions/max_terminated_length": 396.0, | |
| "completions/mean_length": 158.1875, | |
| "completions/mean_terminated_length": 158.1875, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "entropy": 0.1409977823495865, | |
| "epoch": 2.788235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42590776085853577, | |
| "learning_rate": 7.450980392156863e-08, | |
| "loss": -0.0, | |
| "num_tokens": 37000224.0, | |
| "reward": 0.7145720720291138, | |
| "reward_std": 0.164639413356781, | |
| "rewards/rna_reward_fn/mean": 0.7145720720291138, | |
| "rewards/rna_reward_fn/std": 0.3098330497741699, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 276.0, | |
| "completions/max_terminated_length": 276.0, | |
| "completions/mean_length": 107.21875, | |
| "completions/mean_terminated_length": 107.21875, | |
| "completions/min_length": 33.0, | |
| "completions/min_terminated_length": 33.0, | |
| "entropy": 0.11754556372761726, | |
| "epoch": 2.8, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4764781892299652, | |
| "learning_rate": 7.058823529411765e-08, | |
| "loss": 0.0, | |
| "num_tokens": 37111040.0, | |
| "reward": 0.7425558567047119, | |
| "reward_std": 0.16547845304012299, | |
| "rewards/rna_reward_fn/mean": 0.7425558567047119, | |
| "rewards/rna_reward_fn/std": 0.3051395118236542, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 492.0, | |
| "completions/max_terminated_length": 492.0, | |
| "completions/mean_length": 172.84375, | |
| "completions/mean_terminated_length": 172.84375, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "entropy": 0.14019257575273514, | |
| "epoch": 2.8117647058823527, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5157439708709717, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": -0.0, | |
| "num_tokens": 37289056.0, | |
| "reward": 0.6816315650939941, | |
| "reward_std": 0.2366928905248642, | |
| "rewards/rna_reward_fn/mean": 0.6816315650939941, | |
| "rewards/rna_reward_fn/std": 0.326466828584671, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 164.15625, | |
| "completions/mean_terminated_length": 164.15625, | |
| "completions/min_length": 37.0, | |
| "completions/min_terminated_length": 37.0, | |
| "entropy": 0.1466379389166832, | |
| "epoch": 2.8235294117647056, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5139991044998169, | |
| "learning_rate": 6.274509803921569e-08, | |
| "loss": 0.0, | |
| "num_tokens": 37458176.0, | |
| "reward": 0.7532614469528198, | |
| "reward_std": 0.1603999137878418, | |
| "rewards/rna_reward_fn/mean": 0.7532614469528198, | |
| "rewards/rna_reward_fn/std": 0.31244710087776184, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 157.15625, | |
| "completions/mean_terminated_length": 157.15625, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.12356984615325928, | |
| "epoch": 2.835294117647059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.9720450043678284, | |
| "learning_rate": 5.88235294117647e-08, | |
| "loss": -0.0, | |
| "num_tokens": 37620128.0, | |
| "reward": 0.7346148490905762, | |
| "reward_std": 0.15429024398326874, | |
| "rewards/rna_reward_fn/mean": 0.7346148490905762, | |
| "rewards/rna_reward_fn/std": 0.31154975295066833, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 153.5, | |
| "completions/mean_terminated_length": 153.5, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.1341606229543686, | |
| "epoch": 2.847058823529412, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5591171979904175, | |
| "learning_rate": 5.490196078431372e-08, | |
| "loss": -0.0, | |
| "num_tokens": 37778336.0, | |
| "reward": 0.7116289138793945, | |
| "reward_std": 0.21866443753242493, | |
| "rewards/rna_reward_fn/mean": 0.7116289138793945, | |
| "rewards/rna_reward_fn/std": 0.2980954051017761, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 482.0, | |
| "completions/max_terminated_length": 482.0, | |
| "completions/mean_length": 203.4375, | |
| "completions/mean_terminated_length": 203.4375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "entropy": 0.14845673739910126, | |
| "epoch": 2.8588235294117648, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5372319221496582, | |
| "learning_rate": 5.098039215686274e-08, | |
| "loss": 0.0, | |
| "num_tokens": 37987680.0, | |
| "reward": 0.7392944693565369, | |
| "reward_std": 0.19700977206230164, | |
| "rewards/rna_reward_fn/mean": 0.7392945289611816, | |
| "rewards/rna_reward_fn/std": 0.30940258502960205, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 139.6875, | |
| "completions/mean_terminated_length": 139.6875, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.13047143816947937, | |
| "epoch": 2.8705882352941177, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5987316370010376, | |
| "learning_rate": 4.705882352941176e-08, | |
| "loss": -0.0, | |
| "num_tokens": 38131744.0, | |
| "reward": 0.6977779269218445, | |
| "reward_std": 0.2151854932308197, | |
| "rewards/rna_reward_fn/mean": 0.6977779269218445, | |
| "rewards/rna_reward_fn/std": 0.3459690511226654, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 490.0, | |
| "completions/max_terminated_length": 490.0, | |
| "completions/mean_length": 148.40625, | |
| "completions/mean_terminated_length": 148.40625, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "entropy": 0.14810562878847122, | |
| "epoch": 2.8823529411764706, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7430775165557861, | |
| "learning_rate": 4.313725490196078e-08, | |
| "loss": -0.0, | |
| "num_tokens": 38284736.0, | |
| "reward": 0.6900802254676819, | |
| "reward_std": 0.18723735213279724, | |
| "rewards/rna_reward_fn/mean": 0.6900802254676819, | |
| "rewards/rna_reward_fn/std": 0.3328934609889984, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 139.5625, | |
| "completions/mean_terminated_length": 139.5625, | |
| "completions/min_length": 30.0, | |
| "completions/min_terminated_length": 30.0, | |
| "entropy": 0.12182106822729111, | |
| "epoch": 2.8941176470588235, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.49635204672813416, | |
| "learning_rate": 3.9215686274509804e-08, | |
| "loss": 0.0, | |
| "num_tokens": 38428672.0, | |
| "reward": 0.7072439193725586, | |
| "reward_std": 0.1840672791004181, | |
| "rewards/rna_reward_fn/mean": 0.7072439193725586, | |
| "rewards/rna_reward_fn/std": 0.3065541088581085, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 167.6875, | |
| "completions/mean_terminated_length": 167.6875, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.13815301656723022, | |
| "epoch": 2.9058823529411764, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.8550586104393005, | |
| "learning_rate": 3.5294117647058824e-08, | |
| "loss": -0.0, | |
| "num_tokens": 38601408.0, | |
| "reward": 0.7532185316085815, | |
| "reward_std": 0.1475568264722824, | |
| "rewards/rna_reward_fn/mean": 0.7532185316085815, | |
| "rewards/rna_reward_fn/std": 0.29489991068840027, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 340.0, | |
| "completions/max_terminated_length": 340.0, | |
| "completions/mean_length": 122.34375, | |
| "completions/mean_terminated_length": 122.34375, | |
| "completions/min_length": 29.0, | |
| "completions/min_terminated_length": 29.0, | |
| "entropy": 0.12259503453969955, | |
| "epoch": 2.9176470588235293, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.44689512252807617, | |
| "learning_rate": 3.1372549019607844e-08, | |
| "loss": 0.0, | |
| "num_tokens": 38727712.0, | |
| "reward": 0.7440149784088135, | |
| "reward_std": 0.1674138307571411, | |
| "rewards/rna_reward_fn/mean": 0.7440149188041687, | |
| "rewards/rna_reward_fn/std": 0.3040436804294586, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 474.0, | |
| "completions/max_terminated_length": 474.0, | |
| "completions/mean_length": 192.0, | |
| "completions/mean_terminated_length": 192.0, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "entropy": 0.1282111555337906, | |
| "epoch": 2.9294117647058826, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5679563879966736, | |
| "learning_rate": 2.745098039215686e-08, | |
| "loss": 0.0, | |
| "num_tokens": 38925344.0, | |
| "reward": 0.6850175857543945, | |
| "reward_std": 0.19530020654201508, | |
| "rewards/rna_reward_fn/mean": 0.6850175857543945, | |
| "rewards/rna_reward_fn/std": 0.33921393752098083, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 117.09375, | |
| "completions/mean_terminated_length": 117.09375, | |
| "completions/min_length": 17.0, | |
| "completions/min_terminated_length": 17.0, | |
| "entropy": 0.12855321913957596, | |
| "epoch": 2.9411764705882355, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.505153238773346, | |
| "learning_rate": 2.352941176470588e-08, | |
| "loss": -0.0, | |
| "num_tokens": 39046272.0, | |
| "reward": 0.6269246339797974, | |
| "reward_std": 0.16829745471477509, | |
| "rewards/rna_reward_fn/mean": 0.6269246339797974, | |
| "rewards/rna_reward_fn/std": 0.33109787106513977, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 424.0, | |
| "completions/max_terminated_length": 424.0, | |
| "completions/mean_length": 121.0, | |
| "completions/mean_terminated_length": 121.0, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "entropy": 0.12059168517589569, | |
| "epoch": 2.9529411764705884, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.4366406500339508, | |
| "learning_rate": 1.9607843137254902e-08, | |
| "loss": 0.0, | |
| "num_tokens": 39171200.0, | |
| "reward": 0.7053718566894531, | |
| "reward_std": 0.14770260453224182, | |
| "rewards/rna_reward_fn/mean": 0.7053717970848083, | |
| "rewards/rna_reward_fn/std": 0.3234374523162842, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 327.0, | |
| "completions/max_terminated_length": 327.0, | |
| "completions/mean_length": 132.1875, | |
| "completions/mean_terminated_length": 132.1875, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "entropy": 0.13018939644098282, | |
| "epoch": 2.9647058823529413, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6731492280960083, | |
| "learning_rate": 1.5686274509803922e-08, | |
| "loss": 0.0, | |
| "num_tokens": 39307584.0, | |
| "reward": 0.7679715752601624, | |
| "reward_std": 0.17536047101020813, | |
| "rewards/rna_reward_fn/mean": 0.7679715156555176, | |
| "rewards/rna_reward_fn/std": 0.2801183760166168, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 145.40625, | |
| "completions/mean_terminated_length": 145.40625, | |
| "completions/min_length": 37.0, | |
| "completions/min_terminated_length": 37.0, | |
| "entropy": 0.10920717194676399, | |
| "epoch": 2.976470588235294, | |
| "frac_reward_zero_std": 0.03125, | |
| "grad_norm": 0.46245628595352173, | |
| "learning_rate": 1.176470588235294e-08, | |
| "loss": 0.0, | |
| "num_tokens": 39457504.0, | |
| "reward": 0.7559751272201538, | |
| "reward_std": 0.15144692361354828, | |
| "rewards/rna_reward_fn/mean": 0.7559751272201538, | |
| "rewards/rna_reward_fn/std": 0.3152746260166168, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 193.3125, | |
| "completions/mean_terminated_length": 193.3125, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "entropy": 0.15460387617349625, | |
| "epoch": 2.988235294117647, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.6124170422554016, | |
| "learning_rate": 7.843137254901961e-09, | |
| "loss": 0.0, | |
| "num_tokens": 39656480.0, | |
| "reward": 0.7068374752998352, | |
| "reward_std": 0.19490104913711548, | |
| "rewards/rna_reward_fn/mean": 0.7068374752998352, | |
| "rewards/rna_reward_fn/std": 0.310377836227417, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 401.0, | |
| "completions/max_terminated_length": 401.0, | |
| "completions/mean_length": 149.5, | |
| "completions/mean_terminated_length": 149.5, | |
| "completions/min_length": 36.0, | |
| "completions/min_terminated_length": 36.0, | |
| "entropy": 0.1327020823955536, | |
| "epoch": 3.0, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.5195903778076172, | |
| "learning_rate": 3.9215686274509805e-09, | |
| "loss": -0.0, | |
| "num_tokens": 39810592.0, | |
| "reward": 0.7493961453437805, | |
| "reward_std": 0.17497789859771729, | |
| "rewards/rna_reward_fn/mean": 0.7493961453437805, | |
| "rewards/rna_reward_fn/std": 0.31194695830345154, | |
| "step": 255 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 255, | |
| "num_input_tokens_seen": 39810592, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |