LLM-RNA-Design-2025 / model /SL+RL /trainer_state.json

Initial upload: model + data

482db35 verified about 2 months ago

242 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 255,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 378.0,
	"completions/max_terminated_length": 378.0,
	"completions/mean_length": 116.875,
	"completions/mean_terminated_length": 116.875,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.3960496634244919,
	"epoch": 0.011764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3658151626586914,
	"learning_rate": 1e-06,
	"loss": 0.0,
	"num_tokens": 120704.0,
	"reward": 0.42291906476020813,
	"reward_std": 0.353160560131073,
	"rewards/rna_reward_fn/mean": 0.42291906476020813,
	"rewards/rna_reward_fn/std": 0.39480823278427124,
	"step": 1
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.34375,
	"completions/mean_terminated_length": 145.34375,
	"completions/min_length": 32.0,
	"completions/min_terminated_length": 32.0,
	"entropy": 0.3918581157922745,
	"epoch": 0.023529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3561055362224579,
	"learning_rate": 9.96078431372549e-07,
	"loss": 0.0,
	"num_tokens": 270560.0,
	"reward": 0.4679465889930725,
	"reward_std": 0.304127037525177,
	"rewards/rna_reward_fn/mean": 0.4679465889930725,
	"rewards/rna_reward_fn/std": 0.37357842922210693,
	"step": 2
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 169.4375,
	"completions/mean_terminated_length": 169.4375,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.3528731167316437,
	"epoch": 0.03529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3573973476886749,
	"learning_rate": 9.92156862745098e-07,
	"loss": 0.0,
	"num_tokens": 445088.0,
	"reward": 0.4688035249710083,
	"reward_std": 0.3215726613998413,
	"rewards/rna_reward_fn/mean": 0.4688035249710083,
	"rewards/rna_reward_fn/std": 0.3945569097995758,
	"step": 3
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.53125,
	"completions/mean_terminated_length": 164.53125,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.3565346747636795,
	"epoch": 0.047058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.37075310945510864,
	"learning_rate": 9.88235294117647e-07,
	"loss": -0.0,
	"num_tokens": 614592.0,
	"reward": 0.5333437323570251,
	"reward_std": 0.3202625513076782,
	"rewards/rna_reward_fn/mean": 0.5333437323570251,
	"rewards/rna_reward_fn/std": 0.3746815025806427,
	"step": 4
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 103.3125,
	"completions/mean_terminated_length": 103.3125,
	"completions/min_length": 20.0,
	"completions/min_terminated_length": 20.0,
	"entropy": 0.35146908462047577,
	"epoch": 0.058823529411764705,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.34449008107185364,
	"learning_rate": 9.84313725490196e-07,
	"loss": -0.0,
	"num_tokens": 721408.0,
	"reward": 0.5266900062561035,
	"reward_std": 0.32159364223480225,
	"rewards/rna_reward_fn/mean": 0.5266900062561035,
	"rewards/rna_reward_fn/std": 0.3701845705509186,
	"step": 5
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 161.25,
	"completions/mean_terminated_length": 161.25,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.3309106081724167,
	"epoch": 0.07058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.35763484239578247,
	"learning_rate": 9.80392156862745e-07,
	"loss": -0.0,
	"num_tokens": 887552.0,
	"reward": 0.5357265472412109,
	"reward_std": 0.2797412872314453,
	"rewards/rna_reward_fn/mean": 0.5357265472412109,
	"rewards/rna_reward_fn/std": 0.3577335476875305,
	"step": 6
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.375,
	"completions/mean_terminated_length": 151.375,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.34717176854610443,
	"epoch": 0.08235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3663802146911621,
	"learning_rate": 9.76470588235294e-07,
	"loss": -0.0,
	"num_tokens": 1043584.0,
	"reward": 0.547458291053772,
	"reward_std": 0.2995288372039795,
	"rewards/rna_reward_fn/mean": 0.547458291053772,
	"rewards/rna_reward_fn/std": 0.3604092001914978,
	"step": 7
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 482.0,
	"completions/max_terminated_length": 482.0,
	"completions/mean_length": 167.125,
	"completions/mean_terminated_length": 167.125,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.31340789794921875,
	"epoch": 0.09411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4071066081523895,
	"learning_rate": 9.725490196078432e-07,
	"loss": -0.0,
	"num_tokens": 1215744.0,
	"reward": 0.5176310539245605,
	"reward_std": 0.3205966353416443,
	"rewards/rna_reward_fn/mean": 0.5176310539245605,
	"rewards/rna_reward_fn/std": 0.3642078638076782,
	"step": 8
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.59375,
	"completions/mean_terminated_length": 151.59375,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.305365189909935,
	"epoch": 0.10588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3989139795303345,
	"learning_rate": 9.686274509803921e-07,
	"loss": -0.0,
	"num_tokens": 1372000.0,
	"reward": 0.6008568406105042,
	"reward_std": 0.30818045139312744,
	"rewards/rna_reward_fn/mean": 0.6008569002151489,
	"rewards/rna_reward_fn/std": 0.35290631651878357,
	"step": 9
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 135.53125,
	"completions/mean_terminated_length": 135.53125,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.2962174266576767,
	"epoch": 0.11764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.44642144441604614,
	"learning_rate": 9.64705882352941e-07,
	"loss": 0.0,
	"num_tokens": 1511808.0,
	"reward": 0.540717601776123,
	"reward_std": 0.3060719966888428,
	"rewards/rna_reward_fn/mean": 0.540717601776123,
	"rewards/rna_reward_fn/std": 0.36574023962020874,
	"step": 10
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 187.71875,
	"completions/mean_terminated_length": 187.71875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.2934599667787552,
	"epoch": 0.12941176470588237,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3814420700073242,
	"learning_rate": 9.607843137254902e-07,
	"loss": -0.0,
	"num_tokens": 1705056.0,
	"reward": 0.6084277629852295,
	"reward_std": 0.3016743063926697,
	"rewards/rna_reward_fn/mean": 0.6084277629852295,
	"rewards/rna_reward_fn/std": 0.37008586525917053,
	"step": 11
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 123.65625,
	"completions/mean_terminated_length": 123.65625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.28613443672657013,
	"epoch": 0.1411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.34958702325820923,
	"learning_rate": 9.568627450980392e-07,
	"loss": 0.0,
	"num_tokens": 1832704.0,
	"reward": 0.6017879247665405,
	"reward_std": 0.3006741404533386,
	"rewards/rna_reward_fn/mean": 0.6017879247665405,
	"rewards/rna_reward_fn/std": 0.35490649938583374,
	"step": 12
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.65625,
	"completions/mean_terminated_length": 140.65625,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.277506560087204,
	"epoch": 0.15294117647058825,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5352854132652283,
	"learning_rate": 9.529411764705881e-07,
	"loss": 0.0,
	"num_tokens": 1977760.0,
	"reward": 0.571915328502655,
	"reward_std": 0.2985040843486786,
	"rewards/rna_reward_fn/mean": 0.5719153881072998,
	"rewards/rna_reward_fn/std": 0.3767135441303253,
	"step": 13
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 424.0,
	"completions/max_terminated_length": 424.0,
	"completions/mean_length": 154.03125,
	"completions/mean_terminated_length": 154.03125,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.2907712608575821,
	"epoch": 0.16470588235294117,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.40935981273651123,
	"learning_rate": 9.490196078431371e-07,
	"loss": 0.0,
	"num_tokens": 2136512.0,
	"reward": 0.5937778353691101,
	"reward_std": 0.270163893699646,
	"rewards/rna_reward_fn/mean": 0.5937778353691101,
	"rewards/rna_reward_fn/std": 0.3509018123149872,
	"step": 14
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 184.40625,
	"completions/mean_terminated_length": 184.40625,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.27846619486808777,
	"epoch": 0.17647058823529413,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.41769424080848694,
	"learning_rate": 9.450980392156862e-07,
	"loss": 0.0,
	"num_tokens": 2326368.0,
	"reward": 0.6163018941879272,
	"reward_std": 0.26538053154945374,
	"rewards/rna_reward_fn/mean": 0.6163018941879272,
	"rewards/rna_reward_fn/std": 0.3496814966201782,
	"step": 15
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 389.0,
	"completions/max_terminated_length": 389.0,
	"completions/mean_length": 117.84375,
	"completions/mean_terminated_length": 117.84375,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.2604786157608032,
	"epoch": 0.18823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3845226764678955,
	"learning_rate": 9.411764705882352e-07,
	"loss": 0.0,
	"num_tokens": 2448064.0,
	"reward": 0.5925071239471436,
	"reward_std": 0.2943580150604248,
	"rewards/rna_reward_fn/mean": 0.5925071239471436,
	"rewards/rna_reward_fn/std": 0.3674796521663666,
	"step": 16
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 112.125,
	"completions/mean_terminated_length": 112.125,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.25712524354457855,
	"epoch": 0.2,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.39543959498405457,
	"learning_rate": 9.372549019607843e-07,
	"loss": -0.0,
	"num_tokens": 2563904.0,
	"reward": 0.5904660224914551,
	"reward_std": 0.26803961396217346,
	"rewards/rna_reward_fn/mean": 0.5904660224914551,
	"rewards/rna_reward_fn/std": 0.3583122193813324,
	"step": 17
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 138.40625,
	"completions/mean_terminated_length": 138.40625,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.27494488656520844,
	"epoch": 0.21176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.32191383838653564,
	"learning_rate": 9.333333333333333e-07,
	"loss": -0.0,
	"num_tokens": 2706656.0,
	"reward": 0.6467701196670532,
	"reward_std": 0.2634694576263428,
	"rewards/rna_reward_fn/mean": 0.6467701196670532,
	"rewards/rna_reward_fn/std": 0.3313148319721222,
	"step": 18
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 137.6875,
	"completions/mean_terminated_length": 137.6875,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.260918065905571,
	"epoch": 0.2235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4905475378036499,
	"learning_rate": 9.294117647058824e-07,
	"loss": 0.0,
	"num_tokens": 2848672.0,
	"reward": 0.5871793031692505,
	"reward_std": 0.25154006481170654,
	"rewards/rna_reward_fn/mean": 0.5871793031692505,
	"rewards/rna_reward_fn/std": 0.3587729334831238,
	"step": 19
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 166.78125,
	"completions/mean_terminated_length": 166.78125,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.26801037788391113,
	"epoch": 0.23529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7330372929573059,
	"learning_rate": 9.254901960784314e-07,
	"loss": -0.0,
	"num_tokens": 3020480.0,
	"reward": 0.5460379123687744,
	"reward_std": 0.27695512771606445,
	"rewards/rna_reward_fn/mean": 0.5460379123687744,
	"rewards/rna_reward_fn/std": 0.37495046854019165,
	"step": 20
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.6875,
	"completions/mean_terminated_length": 142.6875,
	"completions/min_length": 39.0,
	"completions/min_terminated_length": 39.0,
	"entropy": 0.26508544385433197,
	"epoch": 0.24705882352941178,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4575193524360657,
	"learning_rate": 9.215686274509803e-07,
	"loss": 0.0,
	"num_tokens": 3167616.0,
	"reward": 0.6192805171012878,
	"reward_std": 0.2736813426017761,
	"rewards/rna_reward_fn/mean": 0.6192805171012878,
	"rewards/rna_reward_fn/std": 0.3539046049118042,
	"step": 21
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 154.25,
	"completions/mean_terminated_length": 154.25,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.25467583537101746,
	"epoch": 0.25882352941176473,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.407436341047287,
	"learning_rate": 9.176470588235293e-07,
	"loss": 0.0,
	"num_tokens": 3326592.0,
	"reward": 0.5778753757476807,
	"reward_std": 0.27449485659599304,
	"rewards/rna_reward_fn/mean": 0.5778753757476807,
	"rewards/rna_reward_fn/std": 0.3692671060562134,
	"step": 22
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 376.0,
	"completions/max_terminated_length": 376.0,
	"completions/mean_length": 135.46875,
	"completions/mean_terminated_length": 135.46875,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.23743800073862076,
	"epoch": 0.27058823529411763,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.36481839418411255,
	"learning_rate": 9.137254901960783e-07,
	"loss": -0.0,
	"num_tokens": 3466336.0,
	"reward": 0.6230462193489075,
	"reward_std": 0.27385085821151733,
	"rewards/rna_reward_fn/mean": 0.6230462193489075,
	"rewards/rna_reward_fn/std": 0.35384857654571533,
	"step": 23
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 159.25,
	"completions/mean_terminated_length": 159.25,
	"completions/min_length": 39.0,
	"completions/min_terminated_length": 39.0,
	"entropy": 0.2592047303915024,
	"epoch": 0.2823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.40386804938316345,
	"learning_rate": 9.098039215686274e-07,
	"loss": -0.0,
	"num_tokens": 3630432.0,
	"reward": 0.587247908115387,
	"reward_std": 0.26836222410202026,
	"rewards/rna_reward_fn/mean": 0.587247908115387,
	"rewards/rna_reward_fn/std": 0.3811717927455902,
	"step": 24
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 152.375,
	"completions/mean_terminated_length": 152.375,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.23664871603250504,
	"epoch": 0.29411764705882354,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.514864444732666,
	"learning_rate": 9.058823529411764e-07,
	"loss": -0.0,
	"num_tokens": 3787488.0,
	"reward": 0.6044737696647644,
	"reward_std": 0.2556478679180145,
	"rewards/rna_reward_fn/mean": 0.6044737696647644,
	"rewards/rna_reward_fn/std": 0.3558889329433441,
	"step": 25
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 418.0,
	"completions/max_terminated_length": 418.0,
	"completions/mean_length": 140.5,
	"completions/mean_terminated_length": 140.5,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.2437874600291252,
	"epoch": 0.3058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4290100038051605,
	"learning_rate": 9.019607843137255e-07,
	"loss": -0.0,
	"num_tokens": 3932384.0,
	"reward": 0.583857536315918,
	"reward_std": 0.2450568526983261,
	"rewards/rna_reward_fn/mean": 0.583857536315918,
	"rewards/rna_reward_fn/std": 0.3653680384159088,
	"step": 26
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 164.8125,
	"completions/mean_terminated_length": 164.8125,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.24944818764925003,
	"epoch": 0.3176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42284926772117615,
	"learning_rate": 8.980392156862745e-07,
	"loss": -0.0,
	"num_tokens": 4102176.0,
	"reward": 0.5925735235214233,
	"reward_std": 0.2968187630176544,
	"rewards/rna_reward_fn/mean": 0.5925735235214233,
	"rewards/rna_reward_fn/std": 0.3608212471008301,
	"step": 27
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 397.0,
	"completions/max_terminated_length": 397.0,
	"completions/mean_length": 146.1875,
	"completions/mean_terminated_length": 146.1875,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.22080854326486588,
	"epoch": 0.32941176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4605961740016937,
	"learning_rate": 8.941176470588236e-07,
	"loss": 0.0,
	"num_tokens": 4252896.0,
	"reward": 0.5584173202514648,
	"reward_std": 0.2890748083591461,
	"rewards/rna_reward_fn/mean": 0.5584173202514648,
	"rewards/rna_reward_fn/std": 0.3958645462989807,
	"step": 28
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 175.90625,
	"completions/mean_terminated_length": 175.90625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.2321019321680069,
	"epoch": 0.3411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5582552552223206,
	"learning_rate": 8.901960784313724e-07,
	"loss": 0.0,
	"num_tokens": 4434048.0,
	"reward": 0.5966294407844543,
	"reward_std": 0.2823025584220886,
	"rewards/rna_reward_fn/mean": 0.5966294407844543,
	"rewards/rna_reward_fn/std": 0.3560717701911926,
	"step": 29
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 174.1875,
	"completions/mean_terminated_length": 174.1875,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.21510899811983109,
	"epoch": 0.35294117647058826,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49061208963394165,
	"learning_rate": 8.862745098039215e-07,
	"loss": -0.0,
	"num_tokens": 4613440.0,
	"reward": 0.5848400592803955,
	"reward_std": 0.267974317073822,
	"rewards/rna_reward_fn/mean": 0.5848400592803955,
	"rewards/rna_reward_fn/std": 0.37775954604148865,
	"step": 30
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 490.0,
	"completions/max_terminated_length": 490.0,
	"completions/mean_length": 163.15625,
	"completions/mean_terminated_length": 163.15625,
	"completions/min_length": 32.0,
	"completions/min_terminated_length": 32.0,
	"entropy": 0.2507341653108597,
	"epoch": 0.36470588235294116,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.603717029094696,
	"learning_rate": 8.823529411764705e-07,
	"loss": 0.0,
	"num_tokens": 4781536.0,
	"reward": 0.6572607159614563,
	"reward_std": 0.2553848624229431,
	"rewards/rna_reward_fn/mean": 0.6572607159614563,
	"rewards/rna_reward_fn/std": 0.3443078398704529,
	"step": 31
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 170.34375,
	"completions/mean_terminated_length": 170.34375,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.2254045456647873,
	"epoch": 0.3764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5129714608192444,
	"learning_rate": 8.784313725490196e-07,
	"loss": -0.0,
	"num_tokens": 4956992.0,
	"reward": 0.6237974762916565,
	"reward_std": 0.2781754732131958,
	"rewards/rna_reward_fn/mean": 0.6237974762916565,
	"rewards/rna_reward_fn/std": 0.37038782238960266,
	"step": 32
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 500.0,
	"completions/max_terminated_length": 500.0,
	"completions/mean_length": 140.96875,
	"completions/mean_terminated_length": 140.96875,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.23444515466690063,
	"epoch": 0.38823529411764707,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5718296766281128,
	"learning_rate": 8.745098039215686e-07,
	"loss": -0.0,
	"num_tokens": 5102368.0,
	"reward": 0.663845956325531,
	"reward_std": 0.23731249570846558,
	"rewards/rna_reward_fn/mean": 0.6638458967208862,
	"rewards/rna_reward_fn/std": 0.3386061191558838,
	"step": 33
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 426.0,
	"completions/max_terminated_length": 426.0,
	"completions/mean_length": 135.84375,
	"completions/mean_terminated_length": 135.84375,
	"completions/min_length": 52.0,
	"completions/min_terminated_length": 52.0,
	"entropy": 0.21551413834095,
	"epoch": 0.4,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.48484402894973755,
	"learning_rate": 8.705882352941177e-07,
	"loss": 0.0,
	"num_tokens": 5242496.0,
	"reward": 0.5733575224876404,
	"reward_std": 0.2985653281211853,
	"rewards/rna_reward_fn/mean": 0.5733575224876404,
	"rewards/rna_reward_fn/std": 0.3665997385978699,
	"step": 34
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 128.34375,
	"completions/mean_terminated_length": 128.34375,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.19232773780822754,
	"epoch": 0.4117647058823529,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3821423351764679,
	"learning_rate": 8.666666666666667e-07,
	"loss": 0.0,
	"num_tokens": 5374944.0,
	"reward": 0.6459628939628601,
	"reward_std": 0.27456825971603394,
	"rewards/rna_reward_fn/mean": 0.6459628939628601,
	"rewards/rna_reward_fn/std": 0.3492187559604645,
	"step": 35
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 117.28125,
	"completions/mean_terminated_length": 117.28125,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.2170068845152855,
	"epoch": 0.4235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.519378125667572,
	"learning_rate": 8.627450980392156e-07,
	"loss": -0.0,
	"num_tokens": 5496064.0,
	"reward": 0.6556386947631836,
	"reward_std": 0.2442726194858551,
	"rewards/rna_reward_fn/mean": 0.6556386947631836,
	"rewards/rna_reward_fn/std": 0.3574485182762146,
	"step": 36
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 143.75,
	"completions/mean_terminated_length": 143.75,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.23470622301101685,
	"epoch": 0.43529411764705883,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4268864691257477,
	"learning_rate": 8.588235294117646e-07,
	"loss": 0.0,
	"num_tokens": 5644288.0,
	"reward": 0.6998727917671204,
	"reward_std": 0.2536011040210724,
	"rewards/rna_reward_fn/mean": 0.6998728513717651,
	"rewards/rna_reward_fn/std": 0.34483227133750916,
	"step": 37
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 161.84375,
	"completions/mean_terminated_length": 161.84375,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.20661279559135437,
	"epoch": 0.4470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49551478028297424,
	"learning_rate": 8.549019607843136e-07,
	"loss": 0.0,
	"num_tokens": 5811040.0,
	"reward": 0.60715651512146,
	"reward_std": 0.2498263716697693,
	"rewards/rna_reward_fn/mean": 0.60715651512146,
	"rewards/rna_reward_fn/std": 0.3692743182182312,
	"step": 38
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 169.09375,
	"completions/mean_terminated_length": 169.09375,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.22686513513326645,
	"epoch": 0.4588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.539655864238739,
	"learning_rate": 8.509803921568627e-07,
	"loss": 0.0,
	"num_tokens": 5985216.0,
	"reward": 0.606254518032074,
	"reward_std": 0.27362608909606934,
	"rewards/rna_reward_fn/mean": 0.606254518032074,
	"rewards/rna_reward_fn/std": 0.37834590673446655,
	"step": 39
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 158.625,
	"completions/mean_terminated_length": 158.625,
	"completions/min_length": 43.0,
	"completions/min_terminated_length": 43.0,
	"entropy": 0.20522872358560562,
	"epoch": 0.47058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4427753686904907,
	"learning_rate": 8.470588235294117e-07,
	"loss": 0.0,
	"num_tokens": 6148672.0,
	"reward": 0.6244011521339417,
	"reward_std": 0.2686484158039093,
	"rewards/rna_reward_fn/mean": 0.6244011521339417,
	"rewards/rna_reward_fn/std": 0.3721536099910736,
	"step": 40
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 165.5,
	"completions/mean_terminated_length": 165.5,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.22500251233577728,
	"epoch": 0.4823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.8924270272254944,
	"learning_rate": 8.431372549019608e-07,
	"loss": 0.0,
	"num_tokens": 6319168.0,
	"reward": 0.5321128368377686,
	"reward_std": 0.29077643156051636,
	"rewards/rna_reward_fn/mean": 0.5321128368377686,
	"rewards/rna_reward_fn/std": 0.3840348422527313,
	"step": 41
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 160.90625,
	"completions/mean_terminated_length": 160.90625,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.23232445865869522,
	"epoch": 0.49411764705882355,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4376697540283203,
	"learning_rate": 8.392156862745098e-07,
	"loss": 0.0,
	"num_tokens": 6484960.0,
	"reward": 0.6353960037231445,
	"reward_std": 0.2474566251039505,
	"rewards/rna_reward_fn/mean": 0.6353960037231445,
	"rewards/rna_reward_fn/std": 0.3577839136123657,
	"step": 42
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 156.9375,
	"completions/mean_terminated_length": 156.9375,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.21899814903736115,
	"epoch": 0.5058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5491610765457153,
	"learning_rate": 8.352941176470589e-07,
	"loss": -0.0,
	"num_tokens": 6646688.0,
	"reward": 0.6090617775917053,
	"reward_std": 0.2399156093597412,
	"rewards/rna_reward_fn/mean": 0.6090618371963501,
	"rewards/rna_reward_fn/std": 0.35401132702827454,
	"step": 43
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 161.28125,
	"completions/mean_terminated_length": 161.28125,
	"completions/min_length": 43.0,
	"completions/min_terminated_length": 43.0,
	"entropy": 0.2018352746963501,
	"epoch": 0.5176470588235295,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4728248715400696,
	"learning_rate": 8.313725490196078e-07,
	"loss": -0.0,
	"num_tokens": 6812864.0,
	"reward": 0.5414500832557678,
	"reward_std": 0.257457435131073,
	"rewards/rna_reward_fn/mean": 0.5414501428604126,
	"rewards/rna_reward_fn/std": 0.37554678320884705,
	"step": 44
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.53125,
	"completions/mean_terminated_length": 144.53125,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.21590139716863632,
	"epoch": 0.5294117647058824,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45613518357276917,
	"learning_rate": 8.274509803921567e-07,
	"loss": 0.0,
	"num_tokens": 6961888.0,
	"reward": 0.5840362310409546,
	"reward_std": 0.24920199811458588,
	"rewards/rna_reward_fn/mean": 0.5840362310409546,
	"rewards/rna_reward_fn/std": 0.3838988244533539,
	"step": 45
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.59375,
	"completions/mean_terminated_length": 151.59375,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.20446214824914932,
	"epoch": 0.5411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4725431799888611,
	"learning_rate": 8.235294117647058e-07,
	"loss": 0.0,
	"num_tokens": 7118144.0,
	"reward": 0.5587388277053833,
	"reward_std": 0.25771480798721313,
	"rewards/rna_reward_fn/mean": 0.5587388277053833,
	"rewards/rna_reward_fn/std": 0.3881581127643585,
	"step": 46
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 148.09375,
	"completions/mean_terminated_length": 148.09375,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.20715581625699997,
	"epoch": 0.5529411764705883,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5775709748268127,
	"learning_rate": 8.196078431372548e-07,
	"loss": -0.0,
	"num_tokens": 7270816.0,
	"reward": 0.6535854935646057,
	"reward_std": 0.23074793815612793,
	"rewards/rna_reward_fn/mean": 0.6535854339599609,
	"rewards/rna_reward_fn/std": 0.35560858249664307,
	"step": 47
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 148.25,
	"completions/mean_terminated_length": 148.25,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.20631568133831024,
	"epoch": 0.5647058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5872889161109924,
	"learning_rate": 8.156862745098039e-07,
	"loss": -0.0,
	"num_tokens": 7423648.0,
	"reward": 0.5795817375183105,
	"reward_std": 0.26122066378593445,
	"rewards/rna_reward_fn/mean": 0.5795817375183105,
	"rewards/rna_reward_fn/std": 0.3758288025856018,
	"step": 48
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 362.0,
	"completions/max_terminated_length": 362.0,
	"completions/mean_length": 124.71875,
	"completions/mean_terminated_length": 124.71875,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.19562938064336777,
	"epoch": 0.5764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.450082391500473,
	"learning_rate": 8.117647058823529e-07,
	"loss": 0.0,
	"num_tokens": 7552384.0,
	"reward": 0.657599925994873,
	"reward_std": 0.24575895071029663,
	"rewards/rna_reward_fn/mean": 0.657599925994873,
	"rewards/rna_reward_fn/std": 0.31881189346313477,
	"step": 49
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.53125,
	"completions/mean_terminated_length": 144.53125,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.212866373360157,
	"epoch": 0.5882352941176471,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4694586992263794,
	"learning_rate": 8.07843137254902e-07,
	"loss": -0.0,
	"num_tokens": 7701408.0,
	"reward": 0.5784563422203064,
	"reward_std": 0.2643548846244812,
	"rewards/rna_reward_fn/mean": 0.5784563422203064,
	"rewards/rna_reward_fn/std": 0.3683941066265106,
	"step": 50
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 138.21875,
	"completions/mean_terminated_length": 138.21875,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.17988762259483337,
	"epoch": 0.6,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.44588983058929443,
	"learning_rate": 8.03921568627451e-07,
	"loss": 0.0,
	"num_tokens": 7843968.0,
	"reward": 0.6563807725906372,
	"reward_std": 0.2578202784061432,
	"rewards/rna_reward_fn/mean": 0.6563807725906372,
	"rewards/rna_reward_fn/std": 0.3404718339443207,
	"step": 51
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 181.0,
	"completions/mean_terminated_length": 181.0,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.22444826364517212,
	"epoch": 0.611764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49978184700012207,
	"learning_rate": 8e-07,
	"loss": -0.0,
	"num_tokens": 8030336.0,
	"reward": 0.6426054239273071,
	"reward_std": 0.2517712712287903,
	"rewards/rna_reward_fn/mean": 0.6426054239273071,
	"rewards/rna_reward_fn/std": 0.3629717528820038,
	"step": 52
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 185.40625,
	"completions/mean_terminated_length": 185.40625,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.20722465217113495,
	"epoch": 0.6235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6321276426315308,
	"learning_rate": 7.960784313725489e-07,
	"loss": -0.0,
	"num_tokens": 8221216.0,
	"reward": 0.7105848789215088,
	"reward_std": 0.23574814200401306,
	"rewards/rna_reward_fn/mean": 0.7105848789215088,
	"rewards/rna_reward_fn/std": 0.3385322690010071,
	"step": 53
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 358.0,
	"completions/max_terminated_length": 358.0,
	"completions/mean_length": 148.125,
	"completions/mean_terminated_length": 148.125,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.19676074385643005,
	"epoch": 0.6352941176470588,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49395662546157837,
	"learning_rate": 7.92156862745098e-07,
	"loss": 0.0,
	"num_tokens": 8373920.0,
	"reward": 0.5770894885063171,
	"reward_std": 0.2644929885864258,
	"rewards/rna_reward_fn/mean": 0.5770894289016724,
	"rewards/rna_reward_fn/std": 0.3790797293186188,
	"step": 54
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 159.6875,
	"completions/mean_terminated_length": 159.6875,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.18705828487873077,
	"epoch": 0.6470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4197390079498291,
	"learning_rate": 7.88235294117647e-07,
	"loss": 0.0,
	"num_tokens": 8538464.0,
	"reward": 0.5764464139938354,
	"reward_std": 0.21550722420215607,
	"rewards/rna_reward_fn/mean": 0.5764464139938354,
	"rewards/rna_reward_fn/std": 0.364503413438797,
	"step": 55
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 146.125,
	"completions/mean_terminated_length": 146.125,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.21118487417697906,
	"epoch": 0.6588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.37146326899528503,
	"learning_rate": 7.84313725490196e-07,
	"loss": 0.0,
	"num_tokens": 8689120.0,
	"reward": 0.6104137897491455,
	"reward_std": 0.23754771053791046,
	"rewards/rna_reward_fn/mean": 0.6104137897491455,
	"rewards/rna_reward_fn/std": 0.3665221333503723,
	"step": 56
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 160.65625,
	"completions/mean_terminated_length": 160.65625,
	"completions/min_length": 41.0,
	"completions/min_terminated_length": 41.0,
	"entropy": 0.1945827156305313,
	"epoch": 0.6705882352941176,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4072308838367462,
	"learning_rate": 7.803921568627451e-07,
	"loss": 0.0,
	"num_tokens": 8854656.0,
	"reward": 0.6713041067123413,
	"reward_std": 0.2212895005941391,
	"rewards/rna_reward_fn/mean": 0.6713041067123413,
	"rewards/rna_reward_fn/std": 0.3392506539821625,
	"step": 57
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.125,
	"completions/mean_terminated_length": 142.125,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.18257632106542587,
	"epoch": 0.6823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4942987561225891,
	"learning_rate": 7.764705882352941e-07,
	"loss": 0.0,
	"num_tokens": 9001216.0,
	"reward": 0.6629120707511902,
	"reward_std": 0.22726097702980042,
	"rewards/rna_reward_fn/mean": 0.6629120707511902,
	"rewards/rna_reward_fn/std": 0.31348657608032227,
	"step": 58
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 147.03125,
	"completions/mean_terminated_length": 147.03125,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.20158874243497849,
	"epoch": 0.6941176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5187806487083435,
	"learning_rate": 7.725490196078432e-07,
	"loss": 0.0,
	"num_tokens": 9152800.0,
	"reward": 0.6476730108261108,
	"reward_std": 0.24552714824676514,
	"rewards/rna_reward_fn/mean": 0.6476730108261108,
	"rewards/rna_reward_fn/std": 0.33643367886543274,
	"step": 59
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 159.4375,
	"completions/mean_terminated_length": 159.4375,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.18591003119945526,
	"epoch": 0.7058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.36044302582740784,
	"learning_rate": 7.686274509803921e-07,
	"loss": 0.0,
	"num_tokens": 9317088.0,
	"reward": 0.6832787394523621,
	"reward_std": 0.22806429862976074,
	"rewards/rna_reward_fn/mean": 0.6832787394523621,
	"rewards/rna_reward_fn/std": 0.32348689436912537,
	"step": 60
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 474.0,
	"completions/max_terminated_length": 474.0,
	"completions/mean_length": 160.96875,
	"completions/mean_terminated_length": 160.96875,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.21002116054296494,
	"epoch": 0.7176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5378114581108093,
	"learning_rate": 7.647058823529411e-07,
	"loss": -0.0,
	"num_tokens": 9482944.0,
	"reward": 0.6531599760055542,
	"reward_std": 0.22567519545555115,
	"rewards/rna_reward_fn/mean": 0.653160035610199,
	"rewards/rna_reward_fn/std": 0.33769848942756653,
	"step": 61
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 348.0,
	"completions/max_terminated_length": 348.0,
	"completions/mean_length": 116.75,
	"completions/mean_terminated_length": 116.75,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.18150582909584045,
	"epoch": 0.7294117647058823,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.39785146713256836,
	"learning_rate": 7.607843137254901e-07,
	"loss": -0.0,
	"num_tokens": 9603520.0,
	"reward": 0.565564751625061,
	"reward_std": 0.2807776927947998,
	"rewards/rna_reward_fn/mean": 0.565564751625061,
	"rewards/rna_reward_fn/std": 0.38936248421669006,
	"step": 62
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 391.0,
	"completions/max_terminated_length": 391.0,
	"completions/mean_length": 147.78125,
	"completions/mean_terminated_length": 147.78125,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.189855195581913,
	"epoch": 0.7411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4473720192909241,
	"learning_rate": 7.568627450980392e-07,
	"loss": -0.0,
	"num_tokens": 9755872.0,
	"reward": 0.6822654008865356,
	"reward_std": 0.23419374227523804,
	"rewards/rna_reward_fn/mean": 0.6822654008865356,
	"rewards/rna_reward_fn/std": 0.32637539505958557,
	"step": 63
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 171.28125,
	"completions/mean_terminated_length": 171.28125,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.19365741312503815,
	"epoch": 0.7529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5170744061470032,
	"learning_rate": 7.529411764705882e-07,
	"loss": -0.0,
	"num_tokens": 9932288.0,
	"reward": 0.6570923328399658,
	"reward_std": 0.24268731474876404,
	"rewards/rna_reward_fn/mean": 0.6570923328399658,
	"rewards/rna_reward_fn/std": 0.3360862731933594,
	"step": 64
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 138.5625,
	"completions/mean_terminated_length": 138.5625,
	"completions/min_length": 50.0,
	"completions/min_terminated_length": 50.0,
	"entropy": 0.15700556337833405,
	"epoch": 0.7647058823529411,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.669632077217102,
	"learning_rate": 7.490196078431373e-07,
	"loss": -0.0,
	"num_tokens": 10075200.0,
	"reward": 0.5884541273117065,
	"reward_std": 0.25077739357948303,
	"rewards/rna_reward_fn/mean": 0.5884541869163513,
	"rewards/rna_reward_fn/std": 0.3707042634487152,
	"step": 65
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 147.875,
	"completions/mean_terminated_length": 147.875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.1868809014558792,
	"epoch": 0.7764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.496881365776062,
	"learning_rate": 7.450980392156863e-07,
	"loss": -0.0,
	"num_tokens": 10227648.0,
	"reward": 0.6189287900924683,
	"reward_std": 0.23646032810211182,
	"rewards/rna_reward_fn/mean": 0.6189287900924683,
	"rewards/rna_reward_fn/std": 0.3614950180053711,
	"step": 66
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 127.75,
	"completions/mean_terminated_length": 127.75,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.17434925585985184,
	"epoch": 0.788235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5550652742385864,
	"learning_rate": 7.411764705882352e-07,
	"loss": 0.0,
	"num_tokens": 10359488.0,
	"reward": 0.5918734073638916,
	"reward_std": 0.2727334499359131,
	"rewards/rna_reward_fn/mean": 0.5918734073638916,
	"rewards/rna_reward_fn/std": 0.35672324895858765,
	"step": 67
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.96875,
	"completions/mean_terminated_length": 151.96875,
	"completions/min_length": 20.0,
	"completions/min_terminated_length": 20.0,
	"entropy": 0.17505493760108948,
	"epoch": 0.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3833948075771332,
	"learning_rate": 7.372549019607843e-07,
	"loss": -0.0,
	"num_tokens": 10516128.0,
	"reward": 0.7000205516815186,
	"reward_std": 0.23740704357624054,
	"rewards/rna_reward_fn/mean": 0.7000205516815186,
	"rewards/rna_reward_fn/std": 0.3234153985977173,
	"step": 68
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 141.15625,
	"completions/mean_terminated_length": 141.15625,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.17628953605890274,
	"epoch": 0.8117647058823529,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.3673928678035736,
	"learning_rate": 7.333333333333332e-07,
	"loss": 0.0,
	"num_tokens": 10661696.0,
	"reward": 0.6538941860198975,
	"reward_std": 0.19288064539432526,
	"rewards/rna_reward_fn/mean": 0.6538941860198975,
	"rewards/rna_reward_fn/std": 0.3515564203262329,
	"step": 69
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 195.53125,
	"completions/mean_terminated_length": 195.53125,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.18974752724170685,
	"epoch": 0.8235294117647058,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.719358503818512,
	"learning_rate": 7.294117647058823e-07,
	"loss": -0.0,
	"num_tokens": 10862944.0,
	"reward": 0.5886421203613281,
	"reward_std": 0.23114809393882751,
	"rewards/rna_reward_fn/mean": 0.5886421203613281,
	"rewards/rna_reward_fn/std": 0.36729925870895386,
	"step": 70
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 156.46875,
	"completions/mean_terminated_length": 156.46875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.17211396992206573,
	"epoch": 0.8352941176470589,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4566245377063751,
	"learning_rate": 7.254901960784313e-07,
	"loss": 0.0,
	"num_tokens": 11024192.0,
	"reward": 0.6206304430961609,
	"reward_std": 0.20096182823181152,
	"rewards/rna_reward_fn/mean": 0.6206304430961609,
	"rewards/rna_reward_fn/std": 0.3349648714065552,
	"step": 71
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 124.84375,
	"completions/mean_terminated_length": 124.84375,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.16766826063394547,
	"epoch": 0.8470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4164656698703766,
	"learning_rate": 7.215686274509804e-07,
	"loss": -0.0,
	"num_tokens": 11153056.0,
	"reward": 0.6351762413978577,
	"reward_std": 0.2213377058506012,
	"rewards/rna_reward_fn/mean": 0.6351762413978577,
	"rewards/rna_reward_fn/std": 0.3493310809135437,
	"step": 72
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 412.0,
	"completions/max_terminated_length": 412.0,
	"completions/mean_length": 129.65625,
	"completions/mean_terminated_length": 129.65625,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.16023673117160797,
	"epoch": 0.8588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.41499394178390503,
	"learning_rate": 7.176470588235294e-07,
	"loss": 0.0,
	"num_tokens": 11286848.0,
	"reward": 0.6752070784568787,
	"reward_std": 0.24617840349674225,
	"rewards/rna_reward_fn/mean": 0.6752070784568787,
	"rewards/rna_reward_fn/std": 0.34732139110565186,
	"step": 73
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.9375,
	"completions/mean_terminated_length": 164.9375,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.18363939225673676,
	"epoch": 0.8705882352941177,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45577237010002136,
	"learning_rate": 7.137254901960785e-07,
	"loss": -0.0,
	"num_tokens": 11456768.0,
	"reward": 0.5772933959960938,
	"reward_std": 0.23847423493862152,
	"rewards/rna_reward_fn/mean": 0.5772933959960938,
	"rewards/rna_reward_fn/std": 0.3823261260986328,
	"step": 74
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 492.0,
	"completions/max_terminated_length": 492.0,
	"completions/mean_length": 188.28125,
	"completions/mean_terminated_length": 188.28125,
	"completions/min_length": 55.0,
	"completions/min_terminated_length": 55.0,
	"entropy": 0.1838960349559784,
	"epoch": 0.8823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5237012505531311,
	"learning_rate": 7.098039215686274e-07,
	"loss": 0.0,
	"num_tokens": 11650592.0,
	"reward": 0.6181286573410034,
	"reward_std": 0.2555590569972992,
	"rewards/rna_reward_fn/mean": 0.6181286573410034,
	"rewards/rna_reward_fn/std": 0.37019652128219604,
	"step": 75
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 182.1875,
	"completions/mean_terminated_length": 182.1875,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.1790659874677658,
	"epoch": 0.8941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4818723499774933,
	"learning_rate": 7.058823529411765e-07,
	"loss": -0.0,
	"num_tokens": 11838176.0,
	"reward": 0.578412652015686,
	"reward_std": 0.22860457003116608,
	"rewards/rna_reward_fn/mean": 0.578412652015686,
	"rewards/rna_reward_fn/std": 0.35265785455703735,
	"step": 76
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 200.21875,
	"completions/mean_terminated_length": 200.21875,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.18565233796834946,
	"epoch": 0.9058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7948216795921326,
	"learning_rate": 7.019607843137254e-07,
	"loss": 0.0,
	"num_tokens": 12044224.0,
	"reward": 0.6187993288040161,
	"reward_std": 0.2622474431991577,
	"rewards/rna_reward_fn/mean": 0.6187993288040161,
	"rewards/rna_reward_fn/std": 0.326750248670578,
	"step": 77
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.8125,
	"completions/mean_terminated_length": 145.8125,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.17154797911643982,
	"epoch": 0.9176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.47565799951553345,
	"learning_rate": 6.980392156862744e-07,
	"loss": -0.0,
	"num_tokens": 12194560.0,
	"reward": 0.5971746444702148,
	"reward_std": 0.18512360751628876,
	"rewards/rna_reward_fn/mean": 0.5971747040748596,
	"rewards/rna_reward_fn/std": 0.3710518777370453,
	"step": 78
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 373.0,
	"completions/max_terminated_length": 373.0,
	"completions/mean_length": 128.71875,
	"completions/mean_terminated_length": 128.71875,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.15196984261274338,
	"epoch": 0.9294117647058824,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4943343698978424,
	"learning_rate": 6.941176470588235e-07,
	"loss": -0.0,
	"num_tokens": 12327392.0,
	"reward": 0.6471496820449829,
	"reward_std": 0.22329822182655334,
	"rewards/rna_reward_fn/mean": 0.6471496820449829,
	"rewards/rna_reward_fn/std": 0.33536407351493835,
	"step": 79
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 137.84375,
	"completions/mean_terminated_length": 137.84375,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.16948848217725754,
	"epoch": 0.9411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4759957492351532,
	"learning_rate": 6.901960784313725e-07,
	"loss": -0.0,
	"num_tokens": 12469568.0,
	"reward": 0.659608006477356,
	"reward_std": 0.18602336943149567,
	"rewards/rna_reward_fn/mean": 0.659608006477356,
	"rewards/rna_reward_fn/std": 0.3731914460659027,
	"step": 80
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 146.75,
	"completions/mean_terminated_length": 146.75,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.18501683324575424,
	"epoch": 0.9529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43785735964775085,
	"learning_rate": 6.862745098039216e-07,
	"loss": 0.0,
	"num_tokens": 12620864.0,
	"reward": 0.620478630065918,
	"reward_std": 0.22393935918807983,
	"rewards/rna_reward_fn/mean": 0.620478630065918,
	"rewards/rna_reward_fn/std": 0.35981276631355286,
	"step": 81
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 150.1875,
	"completions/mean_terminated_length": 150.1875,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.1829531416296959,
	"epoch": 0.9647058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4392038583755493,
	"learning_rate": 6.823529411764706e-07,
	"loss": 0.0,
	"num_tokens": 12775680.0,
	"reward": 0.6712214350700378,
	"reward_std": 0.2174052894115448,
	"rewards/rna_reward_fn/mean": 0.6712214946746826,
	"rewards/rna_reward_fn/std": 0.3370954990386963,
	"step": 82
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 391.0,
	"completions/max_terminated_length": 391.0,
	"completions/mean_length": 141.8125,
	"completions/mean_terminated_length": 141.8125,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.1686822921037674,
	"epoch": 0.9764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4484212398529053,
	"learning_rate": 6.784313725490196e-07,
	"loss": -0.0,
	"num_tokens": 12921920.0,
	"reward": 0.6464422345161438,
	"reward_std": 0.2250806838274002,
	"rewards/rna_reward_fn/mean": 0.6464422345161438,
	"rewards/rna_reward_fn/std": 0.3622319996356964,
	"step": 83
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 166.65625,
	"completions/mean_terminated_length": 166.65625,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.17645781487226486,
	"epoch": 0.9882352941176471,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7668079137802124,
	"learning_rate": 6.745098039215686e-07,
	"loss": 0.0,
	"num_tokens": 13093600.0,
	"reward": 0.6832870244979858,
	"reward_std": 0.25750601291656494,
	"rewards/rna_reward_fn/mean": 0.6832869648933411,
	"rewards/rna_reward_fn/std": 0.3430787920951843,
	"step": 84
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.96875,
	"completions/mean_terminated_length": 167.96875,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.17668870836496353,
	"epoch": 1.0,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43097105622291565,
	"learning_rate": 6.705882352941176e-07,
	"loss": 0.0,
	"num_tokens": 13266624.0,
	"reward": 0.5539568662643433,
	"reward_std": 0.22693298757076263,
	"rewards/rna_reward_fn/mean": 0.5539568066596985,
	"rewards/rna_reward_fn/std": 0.38347697257995605,
	"step": 85
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 182.3125,
	"completions/mean_terminated_length": 182.3125,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.1827656850218773,
	"epoch": 1.011764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5608375668525696,
	"learning_rate": 6.666666666666666e-07,
	"loss": 0.0,
	"num_tokens": 13454336.0,
	"reward": 0.7320628762245178,
	"reward_std": 0.22256582975387573,
	"rewards/rna_reward_fn/mean": 0.7320628762245178,
	"rewards/rna_reward_fn/std": 0.30846187472343445,
	"step": 86
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.625,
	"completions/mean_terminated_length": 140.625,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.18483393639326096,
	"epoch": 1.0235294117647058,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4667685627937317,
	"learning_rate": 6.627450980392156e-07,
	"loss": 0.0,
	"num_tokens": 13599360.0,
	"reward": 0.6894385814666748,
	"reward_std": 0.20523157715797424,
	"rewards/rna_reward_fn/mean": 0.6894385814666748,
	"rewards/rna_reward_fn/std": 0.3155847191810608,
	"step": 87
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 150.46875,
	"completions/mean_terminated_length": 150.46875,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.16182925552129745,
	"epoch": 1.035294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6056375503540039,
	"learning_rate": 6.588235294117647e-07,
	"loss": -0.0,
	"num_tokens": 13754464.0,
	"reward": 0.6177388429641724,
	"reward_std": 0.24611341953277588,
	"rewards/rna_reward_fn/mean": 0.6177388429641724,
	"rewards/rna_reward_fn/std": 0.3494950830936432,
	"step": 88
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.09375,
	"completions/mean_terminated_length": 144.09375,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.17024414986371994,
	"epoch": 1.0470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4357620179653168,
	"learning_rate": 6.549019607843137e-07,
	"loss": -0.0,
	"num_tokens": 13903040.0,
	"reward": 0.611262857913971,
	"reward_std": 0.19428220391273499,
	"rewards/rna_reward_fn/mean": 0.611262857913971,
	"rewards/rna_reward_fn/std": 0.3793390393257141,
	"step": 89
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 132.5625,
	"completions/mean_terminated_length": 132.5625,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.16757714748382568,
	"epoch": 1.0588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.440759539604187,
	"learning_rate": 6.509803921568627e-07,
	"loss": -0.0,
	"num_tokens": 14039808.0,
	"reward": 0.6882448196411133,
	"reward_std": 0.19556942582130432,
	"rewards/rna_reward_fn/mean": 0.6882448196411133,
	"rewards/rna_reward_fn/std": 0.32508718967437744,
	"step": 90
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 143.78125,
	"completions/mean_terminated_length": 143.78125,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.1645500287413597,
	"epoch": 1.0705882352941176,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5613058805465698,
	"learning_rate": 6.470588235294117e-07,
	"loss": 0.0,
	"num_tokens": 14188064.0,
	"reward": 0.6789584159851074,
	"reward_std": 0.19199398159980774,
	"rewards/rna_reward_fn/mean": 0.6789584159851074,
	"rewards/rna_reward_fn/std": 0.3482169210910797,
	"step": 91
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 118.34375,
	"completions/mean_terminated_length": 118.34375,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.14176590740680695,
	"epoch": 1.0823529411764705,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4092370867729187,
	"learning_rate": 6.431372549019608e-07,
	"loss": -0.0,
	"num_tokens": 14310272.0,
	"reward": 0.650740921497345,
	"reward_std": 0.18103614449501038,
	"rewards/rna_reward_fn/mean": 0.650740921497345,
	"rewards/rna_reward_fn/std": 0.32734215259552,
	"step": 92
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 154.3125,
	"completions/mean_terminated_length": 154.3125,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.176346056163311,
	"epoch": 1.0941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4459090232849121,
	"learning_rate": 6.392156862745097e-07,
	"loss": 0.0,
	"num_tokens": 14469312.0,
	"reward": 0.6732466816902161,
	"reward_std": 0.22345304489135742,
	"rewards/rna_reward_fn/mean": 0.6732466816902161,
	"rewards/rna_reward_fn/std": 0.3369784951210022,
	"step": 93
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.3125,
	"completions/mean_terminated_length": 145.3125,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.1685405969619751,
	"epoch": 1.1058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5043669939041138,
	"learning_rate": 6.352941176470588e-07,
	"loss": -0.0,
	"num_tokens": 14619136.0,
	"reward": 0.677271842956543,
	"reward_std": 0.20296773314476013,
	"rewards/rna_reward_fn/mean": 0.677271842956543,
	"rewards/rna_reward_fn/std": 0.320669025182724,
	"step": 94
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 170.1875,
	"completions/mean_terminated_length": 170.1875,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.18431222438812256,
	"epoch": 1.1176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6736860275268555,
	"learning_rate": 6.313725490196078e-07,
	"loss": -0.0,
	"num_tokens": 14794432.0,
	"reward": 0.6684234738349915,
	"reward_std": 0.259125292301178,
	"rewards/rna_reward_fn/mean": 0.6684235334396362,
	"rewards/rna_reward_fn/std": 0.34210121631622314,
	"step": 95
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.6875,
	"completions/mean_terminated_length": 157.6875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.16836901009082794,
	"epoch": 1.1294117647058823,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4569699764251709,
	"learning_rate": 6.274509803921569e-07,
	"loss": -0.0,
	"num_tokens": 14956928.0,
	"reward": 0.68538498878479,
	"reward_std": 0.1874302327632904,
	"rewards/rna_reward_fn/mean": 0.68538498878479,
	"rewards/rna_reward_fn/std": 0.295845091342926,
	"step": 96
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 393.0,
	"completions/max_terminated_length": 393.0,
	"completions/mean_length": 140.21875,
	"completions/mean_terminated_length": 140.21875,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.158738911151886,
	"epoch": 1.1411764705882352,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4725809395313263,
	"learning_rate": 6.235294117647059e-07,
	"loss": 0.0,
	"num_tokens": 15101536.0,
	"reward": 0.6654532551765442,
	"reward_std": 0.18864062428474426,
	"rewards/rna_reward_fn/mean": 0.6654532551765442,
	"rewards/rna_reward_fn/std": 0.3371845781803131,
	"step": 97
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 162.8125,
	"completions/mean_terminated_length": 162.8125,
	"completions/min_length": 42.0,
	"completions/min_terminated_length": 42.0,
	"entropy": 0.17738928645849228,
	"epoch": 1.1529411764705881,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5798309445381165,
	"learning_rate": 6.196078431372548e-07,
	"loss": -0.0,
	"num_tokens": 15269280.0,
	"reward": 0.7147358655929565,
	"reward_std": 0.21203583478927612,
	"rewards/rna_reward_fn/mean": 0.7147358655929565,
	"rewards/rna_reward_fn/std": 0.33255505561828613,
	"step": 98
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 168.03125,
	"completions/mean_terminated_length": 168.03125,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.17116892337799072,
	"epoch": 1.1647058823529413,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5520632863044739,
	"learning_rate": 6.156862745098039e-07,
	"loss": -0.0,
	"num_tokens": 15442368.0,
	"reward": 0.6365219950675964,
	"reward_std": 0.20218491554260254,
	"rewards/rna_reward_fn/mean": 0.6365219950675964,
	"rewards/rna_reward_fn/std": 0.35175827145576477,
	"step": 99
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 397.0,
	"completions/max_terminated_length": 397.0,
	"completions/mean_length": 138.0,
	"completions/mean_terminated_length": 138.0,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.17306677252054214,
	"epoch": 1.1764705882352942,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4389256238937378,
	"learning_rate": 6.117647058823529e-07,
	"loss": 0.0,
	"num_tokens": 15584704.0,
	"reward": 0.7388399839401245,
	"reward_std": 0.16607630252838135,
	"rewards/rna_reward_fn/mean": 0.7388399839401245,
	"rewards/rna_reward_fn/std": 0.2576732635498047,
	"step": 100
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 392.0,
	"completions/max_terminated_length": 392.0,
	"completions/mean_length": 137.40625,
	"completions/mean_terminated_length": 137.40625,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.15397901087999344,
	"epoch": 1.188235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5594757795333862,
	"learning_rate": 6.078431372549019e-07,
	"loss": -0.0,
	"num_tokens": 15726432.0,
	"reward": 0.7157045602798462,
	"reward_std": 0.22128766775131226,
	"rewards/rna_reward_fn/mean": 0.7157045602798462,
	"rewards/rna_reward_fn/std": 0.2969537079334259,
	"step": 101
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 127.78125,
	"completions/mean_terminated_length": 127.78125,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.17225481569766998,
	"epoch": 1.2,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.40622541308403015,
	"learning_rate": 6.039215686274509e-07,
	"loss": -0.0,
	"num_tokens": 15858304.0,
	"reward": 0.7043038010597229,
	"reward_std": 0.22727924585342407,
	"rewards/rna_reward_fn/mean": 0.7043038606643677,
	"rewards/rna_reward_fn/std": 0.33978909254074097,
	"step": 102
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.625,
	"completions/mean_terminated_length": 167.625,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.17464321851730347,
	"epoch": 1.2117647058823529,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4603181779384613,
	"learning_rate": 6e-07,
	"loss": -0.0,
	"num_tokens": 16030976.0,
	"reward": 0.61054527759552,
	"reward_std": 0.22179073095321655,
	"rewards/rna_reward_fn/mean": 0.61054527759552,
	"rewards/rna_reward_fn/std": 0.37210676074028015,
	"step": 103
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 156.8125,
	"completions/mean_terminated_length": 156.8125,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.1658085659146309,
	"epoch": 1.223529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4843849539756775,
	"learning_rate": 5.96078431372549e-07,
	"loss": -0.0,
	"num_tokens": 16192576.0,
	"reward": 0.6978532075881958,
	"reward_std": 0.1981123685836792,
	"rewards/rna_reward_fn/mean": 0.6978532671928406,
	"rewards/rna_reward_fn/std": 0.3141247630119324,
	"step": 104
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 181.1875,
	"completions/mean_terminated_length": 181.1875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.16212371736764908,
	"epoch": 1.2352941176470589,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5290284752845764,
	"learning_rate": 5.921568627450981e-07,
	"loss": 0.0,
	"num_tokens": 16379136.0,
	"reward": 0.6463083028793335,
	"reward_std": 0.1896321177482605,
	"rewards/rna_reward_fn/mean": 0.6463083028793335,
	"rewards/rna_reward_fn/std": 0.36457034945487976,
	"step": 105
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 282.0,
	"completions/max_terminated_length": 282.0,
	"completions/mean_length": 124.3125,
	"completions/mean_terminated_length": 124.3125,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.15162574499845505,
	"epoch": 1.2470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.47445422410964966,
	"learning_rate": 5.88235294117647e-07,
	"loss": 0.0,
	"num_tokens": 16507456.0,
	"reward": 0.672465980052948,
	"reward_std": 0.20273976027965546,
	"rewards/rna_reward_fn/mean": 0.6724659204483032,
	"rewards/rna_reward_fn/std": 0.3352026343345642,
	"step": 106
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 154.25,
	"completions/mean_terminated_length": 154.25,
	"completions/min_length": 32.0,
	"completions/min_terminated_length": 32.0,
	"entropy": 0.1651393622159958,
	"epoch": 1.2588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.48081472516059875,
	"learning_rate": 5.843137254901961e-07,
	"loss": -0.0,
	"num_tokens": 16666432.0,
	"reward": 0.6745295524597168,
	"reward_std": 0.21466964483261108,
	"rewards/rna_reward_fn/mean": 0.6745295524597168,
	"rewards/rna_reward_fn/std": 0.3604423701763153,
	"step": 107
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 176.34375,
	"completions/mean_terminated_length": 176.34375,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.16943742334842682,
	"epoch": 1.2705882352941176,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4931647777557373,
	"learning_rate": 5.803921568627451e-07,
	"loss": 0.0,
	"num_tokens": 16848032.0,
	"reward": 0.6875256896018982,
	"reward_std": 0.2435401976108551,
	"rewards/rna_reward_fn/mean": 0.6875256896018982,
	"rewards/rna_reward_fn/std": 0.3279384672641754,
	"step": 108
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 158.09375,
	"completions/mean_terminated_length": 158.09375,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.17465446144342422,
	"epoch": 1.2823529411764705,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5001822113990784,
	"learning_rate": 5.76470588235294e-07,
	"loss": 0.0,
	"num_tokens": 17010944.0,
	"reward": 0.6029446125030518,
	"reward_std": 0.1757221221923828,
	"rewards/rna_reward_fn/mean": 0.6029446125030518,
	"rewards/rna_reward_fn/std": 0.35652756690979004,
	"step": 109
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 426.0,
	"completions/max_terminated_length": 426.0,
	"completions/mean_length": 167.40625,
	"completions/mean_terminated_length": 167.40625,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.16541431099176407,
	"epoch": 1.2941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4689631760120392,
	"learning_rate": 5.725490196078431e-07,
	"loss": -0.0,
	"num_tokens": 17183392.0,
	"reward": 0.6704152226448059,
	"reward_std": 0.20997245609760284,
	"rewards/rna_reward_fn/mean": 0.6704152226448059,
	"rewards/rna_reward_fn/std": 0.32471874356269836,
	"step": 110
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 398.0,
	"completions/max_terminated_length": 398.0,
	"completions/mean_length": 141.71875,
	"completions/mean_terminated_length": 141.71875,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.1647869274020195,
	"epoch": 1.3058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5760033130645752,
	"learning_rate": 5.686274509803921e-07,
	"loss": -0.0,
	"num_tokens": 17329536.0,
	"reward": 0.6938682198524475,
	"reward_std": 0.20044496655464172,
	"rewards/rna_reward_fn/mean": 0.6938682198524475,
	"rewards/rna_reward_fn/std": 0.32881274819374084,
	"step": 111
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 115.96875,
	"completions/mean_terminated_length": 115.96875,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.1390109360218048,
	"epoch": 1.3176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5902699828147888,
	"learning_rate": 5.647058823529412e-07,
	"loss": 0.0,
	"num_tokens": 17449312.0,
	"reward": 0.651271402835846,
	"reward_std": 0.17913030087947845,
	"rewards/rna_reward_fn/mean": 0.651271402835846,
	"rewards/rna_reward_fn/std": 0.3490009009838104,
	"step": 112
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 179.8125,
	"completions/mean_terminated_length": 179.8125,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.16215970367193222,
	"epoch": 1.3294117647058823,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6261849403381348,
	"learning_rate": 5.607843137254902e-07,
	"loss": -0.0,
	"num_tokens": 17634464.0,
	"reward": 0.6400759220123291,
	"reward_std": 0.2095731794834137,
	"rewards/rna_reward_fn/mean": 0.6400759220123291,
	"rewards/rna_reward_fn/std": 0.34743088483810425,
	"step": 113
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 139.59375,
	"completions/mean_terminated_length": 139.59375,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.17950539290905,
	"epoch": 1.3411764705882354,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4431358277797699,
	"learning_rate": 5.568627450980392e-07,
	"loss": 0.0,
	"num_tokens": 17778432.0,
	"reward": 0.7148804068565369,
	"reward_std": 0.19681406021118164,
	"rewards/rna_reward_fn/mean": 0.7148803472518921,
	"rewards/rna_reward_fn/std": 0.2995694577693939,
	"step": 114
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 482.0,
	"completions/max_terminated_length": 482.0,
	"completions/mean_length": 167.6875,
	"completions/mean_terminated_length": 167.6875,
	"completions/min_length": 47.0,
	"completions/min_terminated_length": 47.0,
	"entropy": 0.16394728422164917,
	"epoch": 1.3529411764705883,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4245275557041168,
	"learning_rate": 5.529411764705882e-07,
	"loss": -0.0,
	"num_tokens": 17951168.0,
	"reward": 0.6865168213844299,
	"reward_std": 0.21481367945671082,
	"rewards/rna_reward_fn/mean": 0.6865168213844299,
	"rewards/rna_reward_fn/std": 0.3217703402042389,
	"step": 115
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 146.6875,
	"completions/mean_terminated_length": 146.6875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.16379400342702866,
	"epoch": 1.3647058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7668678760528564,
	"learning_rate": 5.490196078431373e-07,
	"loss": -0.0,
	"num_tokens": 18102400.0,
	"reward": 0.7100426554679871,
	"reward_std": 0.20684288442134857,
	"rewards/rna_reward_fn/mean": 0.7100426554679871,
	"rewards/rna_reward_fn/std": 0.32808709144592285,
	"step": 116
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 147.40625,
	"completions/mean_terminated_length": 147.40625,
	"completions/min_length": 43.0,
	"completions/min_terminated_length": 43.0,
	"entropy": 0.16369594633579254,
	"epoch": 1.3764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4491204619407654,
	"learning_rate": 5.450980392156862e-07,
	"loss": -0.0,
	"num_tokens": 18254368.0,
	"reward": 0.6345921754837036,
	"reward_std": 0.17989099025726318,
	"rewards/rna_reward_fn/mean": 0.6345921754837036,
	"rewards/rna_reward_fn/std": 0.3739507794380188,
	"step": 117
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 128.96875,
	"completions/mean_terminated_length": 128.96875,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.16341928392648697,
	"epoch": 1.388235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.40218448638916016,
	"learning_rate": 5.411764705882353e-07,
	"loss": 0.0,
	"num_tokens": 18387456.0,
	"reward": 0.6973093748092651,
	"reward_std": 0.19106432795524597,
	"rewards/rna_reward_fn/mean": 0.6973093748092651,
	"rewards/rna_reward_fn/std": 0.328565388917923,
	"step": 118
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 185.3125,
	"completions/mean_terminated_length": 185.3125,
	"completions/min_length": 55.0,
	"completions/min_terminated_length": 55.0,
	"entropy": 0.15643662959337234,
	"epoch": 1.4,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4641011953353882,
	"learning_rate": 5.372549019607843e-07,
	"loss": -0.0,
	"num_tokens": 18578240.0,
	"reward": 0.6982426643371582,
	"reward_std": 0.17999790608882904,
	"rewards/rna_reward_fn/mean": 0.6982426643371582,
	"rewards/rna_reward_fn/std": 0.3187488615512848,
	"step": 119
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 500.0,
	"completions/max_terminated_length": 500.0,
	"completions/mean_length": 151.125,
	"completions/mean_terminated_length": 151.125,
	"completions/min_length": 51.0,
	"completions/min_terminated_length": 51.0,
	"entropy": 0.16167542338371277,
	"epoch": 1.4117647058823528,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4710671305656433,
	"learning_rate": 5.333333333333333e-07,
	"loss": -0.0,
	"num_tokens": 18734016.0,
	"reward": 0.765220046043396,
	"reward_std": 0.16310608386993408,
	"rewards/rna_reward_fn/mean": 0.765220046043396,
	"rewards/rna_reward_fn/std": 0.30073776841163635,
	"step": 120
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 200.53125,
	"completions/mean_terminated_length": 200.53125,
	"completions/min_length": 42.0,
	"completions/min_terminated_length": 42.0,
	"entropy": 0.17333289235830307,
	"epoch": 1.423529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5605267882347107,
	"learning_rate": 5.294117647058823e-07,
	"loss": -0.0,
	"num_tokens": 18940384.0,
	"reward": 0.6207563877105713,
	"reward_std": 0.2605891227722168,
	"rewards/rna_reward_fn/mean": 0.6207563877105713,
	"rewards/rna_reward_fn/std": 0.35733622312545776,
	"step": 121
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 378.0,
	"completions/max_terminated_length": 378.0,
	"completions/mean_length": 126.90625,
	"completions/mean_terminated_length": 126.90625,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.16177111864089966,
	"epoch": 1.4352941176470588,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5492433905601501,
	"learning_rate": 5.254901960784313e-07,
	"loss": 0.0,
	"num_tokens": 19071360.0,
	"reward": 0.6156597137451172,
	"reward_std": 0.2084151953458786,
	"rewards/rna_reward_fn/mean": 0.6156597137451172,
	"rewards/rna_reward_fn/std": 0.3588009178638458,
	"step": 122
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 389.0,
	"completions/max_terminated_length": 389.0,
	"completions/mean_length": 126.15625,
	"completions/mean_terminated_length": 126.15625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.1655115783214569,
	"epoch": 1.4470588235294117,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5015555024147034,
	"learning_rate": 5.215686274509804e-07,
	"loss": 0.0,
	"num_tokens": 19201568.0,
	"reward": 0.6790971755981445,
	"reward_std": 0.20820938050746918,
	"rewards/rna_reward_fn/mean": 0.6790972352027893,
	"rewards/rna_reward_fn/std": 0.33763545751571655,
	"step": 123
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 153.75,
	"completions/mean_terminated_length": 153.75,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.1595897227525711,
	"epoch": 1.4588235294117646,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5314822793006897,
	"learning_rate": 5.176470588235294e-07,
	"loss": 0.0,
	"num_tokens": 19360032.0,
	"reward": 0.6510605812072754,
	"reward_std": 0.18497204780578613,
	"rewards/rna_reward_fn/mean": 0.6510605812072754,
	"rewards/rna_reward_fn/std": 0.3650972247123718,
	"step": 124
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 134.5625,
	"completions/mean_terminated_length": 134.5625,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.1490706205368042,
	"epoch": 1.4705882352941178,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5578471422195435,
	"learning_rate": 5.137254901960784e-07,
	"loss": -0.0,
	"num_tokens": 19498848.0,
	"reward": 0.6481872797012329,
	"reward_std": 0.19116738438606262,
	"rewards/rna_reward_fn/mean": 0.6481872797012329,
	"rewards/rna_reward_fn/std": 0.32832634449005127,
	"step": 125
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 186.0625,
	"completions/mean_terminated_length": 186.0625,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.16315071284770966,
	"epoch": 1.4823529411764707,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.47001388669013977,
	"learning_rate": 5.098039215686274e-07,
	"loss": 0.0,
	"num_tokens": 19690400.0,
	"reward": 0.6869475245475769,
	"reward_std": 0.21966272592544556,
	"rewards/rna_reward_fn/mean": 0.6869475245475769,
	"rewards/rna_reward_fn/std": 0.3061429262161255,
	"step": 126
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 159.25,
	"completions/mean_terminated_length": 159.25,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.1544899046421051,
	"epoch": 1.4941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7163305878639221,
	"learning_rate": 5.058823529411765e-07,
	"loss": 0.0,
	"num_tokens": 19854496.0,
	"reward": 0.7104751467704773,
	"reward_std": 0.17693877220153809,
	"rewards/rna_reward_fn/mean": 0.7104751467704773,
	"rewards/rna_reward_fn/std": 0.30990538001060486,
	"step": 127
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 134.6875,
	"completions/mean_terminated_length": 134.6875,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.16278471052646637,
	"epoch": 1.5058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7567697167396545,
	"learning_rate": 5.019607843137255e-07,
	"loss": -0.0,
	"num_tokens": 19993440.0,
	"reward": 0.6815826296806335,
	"reward_std": 0.20137576758861542,
	"rewards/rna_reward_fn/mean": 0.6815826296806335,
	"rewards/rna_reward_fn/std": 0.32526591420173645,
	"step": 128
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.5625,
	"completions/mean_terminated_length": 142.5625,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.16126833856105804,
	"epoch": 1.5176470588235293,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5958517789840698,
	"learning_rate": 4.980392156862744e-07,
	"loss": 0.0,
	"num_tokens": 20140448.0,
	"reward": 0.6496865153312683,
	"reward_std": 0.23397710919380188,
	"rewards/rna_reward_fn/mean": 0.6496865153312683,
	"rewards/rna_reward_fn/std": 0.3660079836845398,
	"step": 129
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 418.0,
	"completions/max_terminated_length": 418.0,
	"completions/mean_length": 178.3125,
	"completions/mean_terminated_length": 178.3125,
	"completions/min_length": 46.0,
	"completions/min_terminated_length": 46.0,
	"entropy": 0.16705547273159027,
	"epoch": 1.5294117647058822,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5045768618583679,
	"learning_rate": 4.941176470588235e-07,
	"loss": 0.0,
	"num_tokens": 20324064.0,
	"reward": 0.6084290146827698,
	"reward_std": 0.22301070392131805,
	"rewards/rna_reward_fn/mean": 0.608428955078125,
	"rewards/rna_reward_fn/std": 0.37412387132644653,
	"step": 130
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 178.21875,
	"completions/mean_terminated_length": 178.21875,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.16225259751081467,
	"epoch": 1.5411764705882351,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4790975749492645,
	"learning_rate": 4.901960784313725e-07,
	"loss": -0.0,
	"num_tokens": 20507584.0,
	"reward": 0.6834284067153931,
	"reward_std": 0.16327084600925446,
	"rewards/rna_reward_fn/mean": 0.6834284067153931,
	"rewards/rna_reward_fn/std": 0.3331601321697235,
	"step": 131
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 124.46875,
	"completions/mean_terminated_length": 124.46875,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.14231518656015396,
	"epoch": 1.5529411764705883,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45782116055488586,
	"learning_rate": 4.862745098039216e-07,
	"loss": -0.0,
	"num_tokens": 20636064.0,
	"reward": 0.6696175336837769,
	"reward_std": 0.1951877474784851,
	"rewards/rna_reward_fn/mean": 0.6696175336837769,
	"rewards/rna_reward_fn/std": 0.3469404876232147,
	"step": 132
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 153.09375,
	"completions/mean_terminated_length": 153.09375,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.14148423075675964,
	"epoch": 1.5647058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6516547203063965,
	"learning_rate": 4.823529411764705e-07,
	"loss": -0.0,
	"num_tokens": 20793856.0,
	"reward": 0.6711336374282837,
	"reward_std": 0.2223963439464569,
	"rewards/rna_reward_fn/mean": 0.6711336374282837,
	"rewards/rna_reward_fn/std": 0.3334668278694153,
	"step": 133
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.34375,
	"completions/mean_terminated_length": 144.34375,
	"completions/min_length": 20.0,
	"completions/min_terminated_length": 20.0,
	"entropy": 0.1529795005917549,
	"epoch": 1.576470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5148042440414429,
	"learning_rate": 4.784313725490196e-07,
	"loss": 0.0,
	"num_tokens": 20942688.0,
	"reward": 0.759110152721405,
	"reward_std": 0.16160593926906586,
	"rewards/rna_reward_fn/mean": 0.7591102123260498,
	"rewards/rna_reward_fn/std": 0.2931617796421051,
	"step": 134
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 339.0,
	"completions/max_terminated_length": 339.0,
	"completions/mean_length": 108.34375,
	"completions/mean_terminated_length": 108.34375,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.1443817839026451,
	"epoch": 1.5882352941176472,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42829352617263794,
	"learning_rate": 4.7450980392156857e-07,
	"loss": -0.0,
	"num_tokens": 21054656.0,
	"reward": 0.6639102697372437,
	"reward_std": 0.20781482756137848,
	"rewards/rna_reward_fn/mean": 0.6639102697372437,
	"rewards/rna_reward_fn/std": 0.3437131941318512,
	"step": 135
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 175.03125,
	"completions/mean_terminated_length": 175.03125,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.15896137803792953,
	"epoch": 1.6,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5342750549316406,
	"learning_rate": 4.705882352941176e-07,
	"loss": 0.0,
	"num_tokens": 21234912.0,
	"reward": 0.6274444460868835,
	"reward_std": 0.22071924805641174,
	"rewards/rna_reward_fn/mean": 0.6274445056915283,
	"rewards/rna_reward_fn/std": 0.3473777174949646,
	"step": 136
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 398.0,
	"completions/max_terminated_length": 398.0,
	"completions/mean_length": 143.65625,
	"completions/mean_terminated_length": 143.65625,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.15408551692962646,
	"epoch": 1.611764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49438202381134033,
	"learning_rate": 4.6666666666666666e-07,
	"loss": 0.0,
	"num_tokens": 21383040.0,
	"reward": 0.6316537857055664,
	"reward_std": 0.1621330976486206,
	"rewards/rna_reward_fn/mean": 0.6316537857055664,
	"rewards/rna_reward_fn/std": 0.34947502613067627,
	"step": 137
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 392.0,
	"completions/max_terminated_length": 392.0,
	"completions/mean_length": 168.84375,
	"completions/mean_terminated_length": 168.84375,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.17249725759029388,
	"epoch": 1.6235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5168977379798889,
	"learning_rate": 4.627450980392157e-07,
	"loss": -0.0,
	"num_tokens": 21556960.0,
	"reward": 0.7472211122512817,
	"reward_std": 0.16369092464447021,
	"rewards/rna_reward_fn/mean": 0.7472211122512817,
	"rewards/rna_reward_fn/std": 0.27173811197280884,
	"step": 138
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.1875,
	"completions/mean_terminated_length": 157.1875,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.16690535098314285,
	"epoch": 1.6352941176470588,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5558773875236511,
	"learning_rate": 4.5882352941176465e-07,
	"loss": 0.0,
	"num_tokens": 21718944.0,
	"reward": 0.6854004859924316,
	"reward_std": 0.19929495453834534,
	"rewards/rna_reward_fn/mean": 0.6854004859924316,
	"rewards/rna_reward_fn/std": 0.31646665930747986,
	"step": 139
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 152.59375,
	"completions/mean_terminated_length": 152.59375,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.1484585627913475,
	"epoch": 1.6470588235294117,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.9384368062019348,
	"learning_rate": 4.549019607843137e-07,
	"loss": -0.0,
	"num_tokens": 21876224.0,
	"reward": 0.6835744380950928,
	"reward_std": 0.1949320137500763,
	"rewards/rna_reward_fn/mean": 0.6835744380950928,
	"rewards/rna_reward_fn/std": 0.35554417967796326,
	"step": 140
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 127.875,
	"completions/mean_terminated_length": 127.875,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.14056292921304703,
	"epoch": 1.6588235294117646,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4758838713169098,
	"learning_rate": 4.5098039215686274e-07,
	"loss": 0.0,
	"num_tokens": 22008192.0,
	"reward": 0.7035012245178223,
	"reward_std": 0.18292057514190674,
	"rewards/rna_reward_fn/mean": 0.703501284122467,
	"rewards/rna_reward_fn/std": 0.29926764965057373,
	"step": 141
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.59375,
	"completions/mean_terminated_length": 164.59375,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.1475282907485962,
	"epoch": 1.6705882352941175,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5269675254821777,
	"learning_rate": 4.470588235294118e-07,
	"loss": -0.0,
	"num_tokens": 22177760.0,
	"reward": 0.724274754524231,
	"reward_std": 0.20411115884780884,
	"rewards/rna_reward_fn/mean": 0.724274754524231,
	"rewards/rna_reward_fn/std": 0.29461607336997986,
	"step": 142
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 166.09375,
	"completions/mean_terminated_length": 166.09375,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.14830049872398376,
	"epoch": 1.6823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5128397345542908,
	"learning_rate": 4.4313725490196073e-07,
	"loss": 0.0,
	"num_tokens": 22348864.0,
	"reward": 0.6864579916000366,
	"reward_std": 0.18042539060115814,
	"rewards/rna_reward_fn/mean": 0.6864579916000366,
	"rewards/rna_reward_fn/std": 0.3156171441078186,
	"step": 143
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 358.0,
	"completions/max_terminated_length": 358.0,
	"completions/mean_length": 121.21875,
	"completions/mean_terminated_length": 121.21875,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.14306584745645523,
	"epoch": 1.6941176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4526241421699524,
	"learning_rate": 4.392156862745098e-07,
	"loss": 0.0,
	"num_tokens": 22474016.0,
	"reward": 0.6906402111053467,
	"reward_std": 0.2201388031244278,
	"rewards/rna_reward_fn/mean": 0.6906402111053467,
	"rewards/rna_reward_fn/std": 0.3415301740169525,
	"step": 144
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 398.0,
	"completions/max_terminated_length": 398.0,
	"completions/mean_length": 111.0625,
	"completions/mean_terminated_length": 111.0625,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.14087412506341934,
	"epoch": 1.7058823529411766,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4583019018173218,
	"learning_rate": 4.352941176470588e-07,
	"loss": 0.0,
	"num_tokens": 22588768.0,
	"reward": 0.7702864408493042,
	"reward_std": 0.1817162036895752,
	"rewards/rna_reward_fn/mean": 0.7702864408493042,
	"rewards/rna_reward_fn/std": 0.28576594591140747,
	"step": 145
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 152.46875,
	"completions/mean_terminated_length": 152.46875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.13646821677684784,
	"epoch": 1.7176470588235295,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5821676850318909,
	"learning_rate": 4.313725490196078e-07,
	"loss": -0.0,
	"num_tokens": 22745920.0,
	"reward": 0.6735475659370422,
	"reward_std": 0.2079792022705078,
	"rewards/rna_reward_fn/mean": 0.6735475659370422,
	"rewards/rna_reward_fn/std": 0.34127116203308105,
	"step": 146
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 398.0,
	"completions/max_terminated_length": 398.0,
	"completions/mean_length": 137.0625,
	"completions/mean_terminated_length": 137.0625,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.1294446587562561,
	"epoch": 1.7294117647058824,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.47053244709968567,
	"learning_rate": 4.274509803921568e-07,
	"loss": 0.0,
	"num_tokens": 22887296.0,
	"reward": 0.7310217618942261,
	"reward_std": 0.16372641921043396,
	"rewards/rna_reward_fn/mean": 0.7310217618942261,
	"rewards/rna_reward_fn/std": 0.29399389028549194,
	"step": 147
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.03125,
	"completions/mean_terminated_length": 164.03125,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.16281016170978546,
	"epoch": 1.7411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5588626861572266,
	"learning_rate": 4.2352941176470586e-07,
	"loss": 0.0,
	"num_tokens": 23056288.0,
	"reward": 0.654833197593689,
	"reward_std": 0.1884084939956665,
	"rewards/rna_reward_fn/mean": 0.654833197593689,
	"rewards/rna_reward_fn/std": 0.3517378270626068,
	"step": 148
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.84375,
	"completions/mean_terminated_length": 140.84375,
	"completions/min_length": 20.0,
	"completions/min_terminated_length": 20.0,
	"entropy": 0.15908341854810715,
	"epoch": 1.7529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5507121086120605,
	"learning_rate": 4.196078431372549e-07,
	"loss": 0.0,
	"num_tokens": 23201536.0,
	"reward": 0.699113667011261,
	"reward_std": 0.20187973976135254,
	"rewards/rna_reward_fn/mean": 0.699113667011261,
	"rewards/rna_reward_fn/std": 0.3249177634716034,
	"step": 149
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 192.4375,
	"completions/mean_terminated_length": 192.4375,
	"completions/min_length": 40.0,
	"completions/min_terminated_length": 40.0,
	"entropy": 0.15749355405569077,
	"epoch": 1.7647058823529411,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.47758468985557556,
	"learning_rate": 4.156862745098039e-07,
	"loss": 0.0,
	"num_tokens": 23399616.0,
	"reward": 0.6602087020874023,
	"reward_std": 0.2426632046699524,
	"rewards/rna_reward_fn/mean": 0.6602087020874023,
	"rewards/rna_reward_fn/std": 0.3394790291786194,
	"step": 150
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 186.375,
	"completions/mean_terminated_length": 186.375,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.1590714380145073,
	"epoch": 1.776470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5084402561187744,
	"learning_rate": 4.117647058823529e-07,
	"loss": 0.0,
	"num_tokens": 23591488.0,
	"reward": 0.6650402545928955,
	"reward_std": 0.18303653597831726,
	"rewards/rna_reward_fn/mean": 0.6650401949882507,
	"rewards/rna_reward_fn/std": 0.33965203166007996,
	"step": 151
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 141.40625,
	"completions/mean_terminated_length": 141.40625,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.14213567227125168,
	"epoch": 1.788235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5413779020309448,
	"learning_rate": 4.0784313725490194e-07,
	"loss": -0.0,
	"num_tokens": 23737312.0,
	"reward": 0.6437839865684509,
	"reward_std": 0.2132418155670166,
	"rewards/rna_reward_fn/mean": 0.6437839865684509,
	"rewards/rna_reward_fn/std": 0.3476622402667999,
	"step": 152
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.75,
	"completions/mean_terminated_length": 140.75,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.14729295670986176,
	"epoch": 1.8,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.48154816031455994,
	"learning_rate": 4.03921568627451e-07,
	"loss": -0.0,
	"num_tokens": 23882464.0,
	"reward": 0.6620033979415894,
	"reward_std": 0.22405345737934113,
	"rewards/rna_reward_fn/mean": 0.6620033979415894,
	"rewards/rna_reward_fn/std": 0.3390491306781769,
	"step": 153
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 166.46875,
	"completions/mean_terminated_length": 166.46875,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.14903101325035095,
	"epoch": 1.811764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.625751793384552,
	"learning_rate": 4e-07,
	"loss": -0.0,
	"num_tokens": 24053952.0,
	"reward": 0.6442551612854004,
	"reward_std": 0.17395520210266113,
	"rewards/rna_reward_fn/mean": 0.6442551612854004,
	"rewards/rna_reward_fn/std": 0.3670194745063782,
	"step": 154
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.90625,
	"completions/mean_terminated_length": 157.90625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.15323904901742935,
	"epoch": 1.8235294117647058,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.48200494050979614,
	"learning_rate": 3.96078431372549e-07,
	"loss": -0.0,
	"num_tokens": 24216672.0,
	"reward": 0.6359031200408936,
	"reward_std": 0.17717690765857697,
	"rewards/rna_reward_fn/mean": 0.6359031200408936,
	"rewards/rna_reward_fn/std": 0.32817214727401733,
	"step": 155
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 145.8125,
	"completions/mean_terminated_length": 145.8125,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.1613752394914627,
	"epoch": 1.835294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.46832966804504395,
	"learning_rate": 3.92156862745098e-07,
	"loss": 0.0,
	"num_tokens": 24367008.0,
	"reward": 0.7130154371261597,
	"reward_std": 0.18193909525871277,
	"rewards/rna_reward_fn/mean": 0.7130154371261597,
	"rewards/rna_reward_fn/std": 0.3411928117275238,
	"step": 156
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.46875,
	"completions/mean_terminated_length": 142.46875,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.13961906731128693,
	"epoch": 1.8470588235294119,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6261844038963318,
	"learning_rate": 3.8823529411764707e-07,
	"loss": -0.0,
	"num_tokens": 24513920.0,
	"reward": 0.711245596408844,
	"reward_std": 0.1767653077840805,
	"rewards/rna_reward_fn/mean": 0.7112456560134888,
	"rewards/rna_reward_fn/std": 0.3348366618156433,
	"step": 157
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 412.0,
	"completions/max_terminated_length": 412.0,
	"completions/mean_length": 152.4375,
	"completions/mean_terminated_length": 152.4375,
	"completions/min_length": 39.0,
	"completions/min_terminated_length": 39.0,
	"entropy": 0.1567898690700531,
	"epoch": 1.8588235294117648,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5200847387313843,
	"learning_rate": 3.8431372549019606e-07,
	"loss": -0.0,
	"num_tokens": 24671040.0,
	"reward": 0.7147434949874878,
	"reward_std": 0.14905846118927002,
	"rewards/rna_reward_fn/mean": 0.7147434949874878,
	"rewards/rna_reward_fn/std": 0.3070945739746094,
	"step": 158
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 392.0,
	"completions/max_terminated_length": 392.0,
	"completions/mean_length": 125.71875,
	"completions/mean_terminated_length": 125.71875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.133110411465168,
	"epoch": 1.8705882352941177,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4239906370639801,
	"learning_rate": 3.8039215686274506e-07,
	"loss": 0.0,
	"num_tokens": 24800800.0,
	"reward": 0.640139639377594,
	"reward_std": 0.20033451914787292,
	"rewards/rna_reward_fn/mean": 0.640139639377594,
	"rewards/rna_reward_fn/std": 0.3294910490512848,
	"step": 159
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 376.0,
	"completions/max_terminated_length": 376.0,
	"completions/mean_length": 134.8125,
	"completions/mean_terminated_length": 134.8125,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.12187084183096886,
	"epoch": 1.8823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.38697147369384766,
	"learning_rate": 3.764705882352941e-07,
	"loss": -0.0,
	"num_tokens": 24939872.0,
	"reward": 0.6659330725669861,
	"reward_std": 0.16438628733158112,
	"rewards/rna_reward_fn/mean": 0.6659330725669861,
	"rewards/rna_reward_fn/std": 0.35713815689086914,
	"step": 160
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 135.5625,
	"completions/mean_terminated_length": 135.5625,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.13703680038452148,
	"epoch": 1.8941176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4564237594604492,
	"learning_rate": 3.7254901960784315e-07,
	"loss": 0.0,
	"num_tokens": 25079712.0,
	"reward": 0.6596216559410095,
	"reward_std": 0.20437049865722656,
	"rewards/rna_reward_fn/mean": 0.6596216559410095,
	"rewards/rna_reward_fn/std": 0.3517865240573883,
	"step": 161
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 177.0625,
	"completions/mean_terminated_length": 177.0625,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.15036547183990479,
	"epoch": 1.9058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45348137617111206,
	"learning_rate": 3.6862745098039214e-07,
	"loss": -0.0,
	"num_tokens": 25262048.0,
	"reward": 0.6836435198783875,
	"reward_std": 0.20624709129333496,
	"rewards/rna_reward_fn/mean": 0.6836435198783875,
	"rewards/rna_reward_fn/std": 0.32797813415527344,
	"step": 162
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 141.71875,
	"completions/mean_terminated_length": 141.71875,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.14257021248340607,
	"epoch": 1.9176470588235293,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4581199586391449,
	"learning_rate": 3.6470588235294114e-07,
	"loss": -0.0,
	"num_tokens": 25408192.0,
	"reward": 0.6231480836868286,
	"reward_std": 0.20732316374778748,
	"rewards/rna_reward_fn/mean": 0.6231480836868286,
	"rewards/rna_reward_fn/std": 0.35448968410491943,
	"step": 163
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 389.0,
	"completions/max_terminated_length": 389.0,
	"completions/mean_length": 103.90625,
	"completions/mean_terminated_length": 103.90625,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.11931119486689568,
	"epoch": 1.9294117647058824,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42869991064071655,
	"learning_rate": 3.607843137254902e-07,
	"loss": -0.0,
	"num_tokens": 25515616.0,
	"reward": 0.7718137502670288,
	"reward_std": 0.15544265508651733,
	"rewards/rna_reward_fn/mean": 0.7718137502670288,
	"rewards/rna_reward_fn/std": 0.2820202112197876,
	"step": 164
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 118.34375,
	"completions/mean_terminated_length": 118.34375,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.13630840182304382,
	"epoch": 1.9411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4796566069126129,
	"learning_rate": 3.5686274509803923e-07,
	"loss": 0.0,
	"num_tokens": 25637824.0,
	"reward": 0.7639800310134888,
	"reward_std": 0.16217514872550964,
	"rewards/rna_reward_fn/mean": 0.7639800310134888,
	"rewards/rna_reward_fn/std": 0.2800072729587555,
	"step": 165
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 492.0,
	"completions/max_terminated_length": 492.0,
	"completions/mean_length": 196.1875,
	"completions/mean_terminated_length": 196.1875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.1692701205611229,
	"epoch": 1.9529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.576678991317749,
	"learning_rate": 3.529411764705882e-07,
	"loss": 0.0,
	"num_tokens": 25839744.0,
	"reward": 0.62703537940979,
	"reward_std": 0.24643635749816895,
	"rewards/rna_reward_fn/mean": 0.62703537940979,
	"rewards/rna_reward_fn/std": 0.3669246435165405,
	"step": 166
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.96875,
	"completions/mean_terminated_length": 167.96875,
	"completions/min_length": 32.0,
	"completions/min_terminated_length": 32.0,
	"entropy": 0.16024480760097504,
	"epoch": 1.9647058823529413,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7311699390411377,
	"learning_rate": 3.490196078431372e-07,
	"loss": 0.0,
	"num_tokens": 26012768.0,
	"reward": 0.6588948369026184,
	"reward_std": 0.1576000452041626,
	"rewards/rna_reward_fn/mean": 0.6588948965072632,
	"rewards/rna_reward_fn/std": 0.32907265424728394,
	"step": 167
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 201.5,
	"completions/mean_terminated_length": 201.5,
	"completions/min_length": 48.0,
	"completions/min_terminated_length": 48.0,
	"entropy": 0.1511036530137062,
	"epoch": 1.9764705882352942,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4694945216178894,
	"learning_rate": 3.4509803921568627e-07,
	"loss": 0.0,
	"num_tokens": 26220128.0,
	"reward": 0.6976197957992554,
	"reward_std": 0.19369524717330933,
	"rewards/rna_reward_fn/mean": 0.6976197957992554,
	"rewards/rna_reward_fn/std": 0.32611048221588135,
	"step": 168
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 400.0,
	"completions/max_terminated_length": 400.0,
	"completions/mean_length": 154.5,
	"completions/mean_terminated_length": 154.5,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.15085221827030182,
	"epoch": 1.988235294117647,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.7034254670143127,
	"learning_rate": 3.411764705882353e-07,
	"loss": 0.0,
	"num_tokens": 26379360.0,
	"reward": 0.6942508220672607,
	"reward_std": 0.20178331434726715,
	"rewards/rna_reward_fn/mean": 0.6942508220672607,
	"rewards/rna_reward_fn/std": 0.31030499935150146,
	"step": 169
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 490.0,
	"completions/max_terminated_length": 490.0,
	"completions/mean_length": 160.53125,
	"completions/mean_terminated_length": 160.53125,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.15548591315746307,
	"epoch": 2.0,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5434289574623108,
	"learning_rate": 3.372549019607843e-07,
	"loss": -0.0,
	"num_tokens": 26544768.0,
	"reward": 0.6601583957672119,
	"reward_std": 0.15550854802131653,
	"rewards/rna_reward_fn/mean": 0.6601583361625671,
	"rewards/rna_reward_fn/std": 0.3311554193496704,
	"step": 170
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 160.40625,
	"completions/mean_terminated_length": 160.40625,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.1544594094157219,
	"epoch": 2.011764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6815203428268433,
	"learning_rate": 3.333333333333333e-07,
	"loss": 0.0,
	"num_tokens": 26710048.0,
	"reward": 0.5972940921783447,
	"reward_std": 0.18555977940559387,
	"rewards/rna_reward_fn/mean": 0.5972940921783447,
	"rewards/rna_reward_fn/std": 0.36445632576942444,
	"step": 171
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.40625,
	"completions/mean_terminated_length": 157.40625,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.14051128178834915,
	"epoch": 2.023529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5093562602996826,
	"learning_rate": 3.2941176470588235e-07,
	"loss": 0.0,
	"num_tokens": 26872256.0,
	"reward": 0.6649138927459717,
	"reward_std": 0.2001783400774002,
	"rewards/rna_reward_fn/mean": 0.6649138331413269,
	"rewards/rna_reward_fn/std": 0.3582386374473572,
	"step": 172
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 173.75,
	"completions/mean_terminated_length": 173.75,
	"completions/min_length": 39.0,
	"completions/min_terminated_length": 39.0,
	"entropy": 0.14279819279909134,
	"epoch": 2.0352941176470587,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4454724192619324,
	"learning_rate": 3.2549019607843134e-07,
	"loss": -0.0,
	"num_tokens": 27051200.0,
	"reward": 0.7748029828071594,
	"reward_std": 0.14138856530189514,
	"rewards/rna_reward_fn/mean": 0.7748030424118042,
	"rewards/rna_reward_fn/std": 0.2777082026004791,
	"step": 173
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 165.8125,
	"completions/mean_terminated_length": 165.8125,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.13190212100744247,
	"epoch": 2.0470588235294116,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4604037404060364,
	"learning_rate": 3.215686274509804e-07,
	"loss": 0.0,
	"num_tokens": 27222016.0,
	"reward": 0.6792135238647461,
	"reward_std": 0.17050443589687347,
	"rewards/rna_reward_fn/mean": 0.6792135834693909,
	"rewards/rna_reward_fn/std": 0.3469991087913513,
	"step": 174
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.21875,
	"completions/mean_terminated_length": 140.21875,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.11882514134049416,
	"epoch": 2.0588235294117645,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42415928840637207,
	"learning_rate": 3.176470588235294e-07,
	"loss": -0.0,
	"num_tokens": 27366624.0,
	"reward": 0.618835985660553,
	"reward_std": 0.19730809330940247,
	"rewards/rna_reward_fn/mean": 0.6188360452651978,
	"rewards/rna_reward_fn/std": 0.3514353334903717,
	"step": 175
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 154.25,
	"completions/mean_terminated_length": 154.25,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.12727607041597366,
	"epoch": 2.070588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5830354690551758,
	"learning_rate": 3.1372549019607843e-07,
	"loss": 0.0,
	"num_tokens": 27525600.0,
	"reward": 0.6785444617271423,
	"reward_std": 0.18948182463645935,
	"rewards/rna_reward_fn/mean": 0.6785444617271423,
	"rewards/rna_reward_fn/std": 0.3351566791534424,
	"step": 176
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 147.78125,
	"completions/mean_terminated_length": 147.78125,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.14719800651073456,
	"epoch": 2.0823529411764707,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.4794676899909973,
	"learning_rate": 3.098039215686274e-07,
	"loss": 0.0,
	"num_tokens": 27677952.0,
	"reward": 0.7077100276947021,
	"reward_std": 0.1931176781654358,
	"rewards/rna_reward_fn/mean": 0.7077100276947021,
	"rewards/rna_reward_fn/std": 0.3137640357017517,
	"step": 177
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.46875,
	"completions/mean_terminated_length": 142.46875,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.15307611972093582,
	"epoch": 2.0941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6268736720085144,
	"learning_rate": 3.0588235294117647e-07,
	"loss": 0.0,
	"num_tokens": 27824864.0,
	"reward": 0.7079458236694336,
	"reward_std": 0.2219894826412201,
	"rewards/rna_reward_fn/mean": 0.7079458236694336,
	"rewards/rna_reward_fn/std": 0.3472329080104828,
	"step": 178
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.0,
	"completions/mean_terminated_length": 164.0,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.13749201595783234,
	"epoch": 2.1058823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5293802618980408,
	"learning_rate": 3.0196078431372546e-07,
	"loss": 0.0,
	"num_tokens": 27993824.0,
	"reward": 0.6385776996612549,
	"reward_std": 0.2456386685371399,
	"rewards/rna_reward_fn/mean": 0.6385776996612549,
	"rewards/rna_reward_fn/std": 0.36081886291503906,
	"step": 179
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.21875,
	"completions/mean_terminated_length": 140.21875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.1387496143579483,
	"epoch": 2.1176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.538530707359314,
	"learning_rate": 2.980392156862745e-07,
	"loss": -0.0,
	"num_tokens": 28138432.0,
	"reward": 0.6739398241043091,
	"reward_std": 0.21720820665359497,
	"rewards/rna_reward_fn/mean": 0.6739398837089539,
	"rewards/rna_reward_fn/std": 0.30697187781333923,
	"step": 180
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 118.65625,
	"completions/mean_terminated_length": 118.65625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.11488081514835358,
	"epoch": 2.1294117647058823,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42285630106925964,
	"learning_rate": 2.941176470588235e-07,
	"loss": -0.0,
	"num_tokens": 28260960.0,
	"reward": 0.7317262887954712,
	"reward_std": 0.20456328988075256,
	"rewards/rna_reward_fn/mean": 0.7317262887954712,
	"rewards/rna_reward_fn/std": 0.2935360074043274,
	"step": 181
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 384.0,
	"completions/max_terminated_length": 384.0,
	"completions/mean_length": 128.8125,
	"completions/mean_terminated_length": 128.8125,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.13038966059684753,
	"epoch": 2.1411764705882352,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43837785720825195,
	"learning_rate": 2.9019607843137255e-07,
	"loss": 0.0,
	"num_tokens": 28393888.0,
	"reward": 0.7334122657775879,
	"reward_std": 0.1874283403158188,
	"rewards/rna_reward_fn/mean": 0.7334122657775879,
	"rewards/rna_reward_fn/std": 0.3205217123031616,
	"step": 182
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.1875,
	"completions/mean_terminated_length": 142.1875,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.142289437353611,
	"epoch": 2.152941176470588,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4689069092273712,
	"learning_rate": 2.8627450980392154e-07,
	"loss": -0.0,
	"num_tokens": 28540512.0,
	"reward": 0.738664448261261,
	"reward_std": 0.16794101893901825,
	"rewards/rna_reward_fn/mean": 0.7386645078659058,
	"rewards/rna_reward_fn/std": 0.30475351214408875,
	"step": 183
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 150.1875,
	"completions/mean_terminated_length": 150.1875,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.13591318577528,
	"epoch": 2.164705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.48003292083740234,
	"learning_rate": 2.823529411764706e-07,
	"loss": -0.0,
	"num_tokens": 28695328.0,
	"reward": 0.6993162631988525,
	"reward_std": 0.1979941427707672,
	"rewards/rna_reward_fn/mean": 0.6993162035942078,
	"rewards/rna_reward_fn/std": 0.31292685866355896,
	"step": 184
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 173.65625,
	"completions/mean_terminated_length": 173.65625,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.15518562495708466,
	"epoch": 2.176470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6343421339988708,
	"learning_rate": 2.784313725490196e-07,
	"loss": -0.0,
	"num_tokens": 28874176.0,
	"reward": 0.7311723232269287,
	"reward_std": 0.2127300500869751,
	"rewards/rna_reward_fn/mean": 0.7311723232269287,
	"rewards/rna_reward_fn/std": 0.3124001622200012,
	"step": 185
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 137.5625,
	"completions/mean_terminated_length": 137.5625,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.1409146785736084,
	"epoch": 2.1882352941176473,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.46661409735679626,
	"learning_rate": 2.7450980392156863e-07,
	"loss": -0.0,
	"num_tokens": 29016064.0,
	"reward": 0.7118009328842163,
	"reward_std": 0.16496126353740692,
	"rewards/rna_reward_fn/mean": 0.7118009328842163,
	"rewards/rna_reward_fn/std": 0.32205572724342346,
	"step": 186
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.21875,
	"completions/mean_terminated_length": 151.21875,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.14989649504423141,
	"epoch": 2.2,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.44188031554222107,
	"learning_rate": 2.705882352941176e-07,
	"loss": -0.0,
	"num_tokens": 29171936.0,
	"reward": 0.7327808141708374,
	"reward_std": 0.17523989081382751,
	"rewards/rna_reward_fn/mean": 0.7327808141708374,
	"rewards/rna_reward_fn/std": 0.32806655764579773,
	"step": 187
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 389.0,
	"completions/max_terminated_length": 389.0,
	"completions/mean_length": 157.84375,
	"completions/mean_terminated_length": 157.84375,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.14322884380817413,
	"epoch": 2.211764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5148700475692749,
	"learning_rate": 2.6666666666666667e-07,
	"loss": -0.0,
	"num_tokens": 29334592.0,
	"reward": 0.6917252540588379,
	"reward_std": 0.17680642008781433,
	"rewards/rna_reward_fn/mean": 0.6917252540588379,
	"rewards/rna_reward_fn/std": 0.30800244212150574,
	"step": 188
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.0,
	"completions/mean_terminated_length": 164.0,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.14842171967029572,
	"epoch": 2.223529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5274482369422913,
	"learning_rate": 2.6274509803921567e-07,
	"loss": 0.0,
	"num_tokens": 29503552.0,
	"reward": 0.7333264350891113,
	"reward_std": 0.17190617322921753,
	"rewards/rna_reward_fn/mean": 0.7333264350891113,
	"rewards/rna_reward_fn/std": 0.26974406838417053,
	"step": 189
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.875,
	"completions/mean_terminated_length": 167.875,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.12728291004896164,
	"epoch": 2.235294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4334995746612549,
	"learning_rate": 2.588235294117647e-07,
	"loss": -0.0,
	"num_tokens": 29676480.0,
	"reward": 0.6551768779754639,
	"reward_std": 0.18493275344371796,
	"rewards/rna_reward_fn/mean": 0.6551768779754639,
	"rewards/rna_reward_fn/std": 0.33756914734840393,
	"step": 190
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 142.59375,
	"completions/mean_terminated_length": 142.59375,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.13632921129465103,
	"epoch": 2.2470588235294118,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5202718377113342,
	"learning_rate": 2.549019607843137e-07,
	"loss": -0.0,
	"num_tokens": 29823520.0,
	"reward": 0.779222309589386,
	"reward_std": 0.1619720160961151,
	"rewards/rna_reward_fn/mean": 0.779222309589386,
	"rewards/rna_reward_fn/std": 0.255502849817276,
	"step": 191
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 381.0,
	"completions/max_terminated_length": 381.0,
	"completions/mean_length": 141.8125,
	"completions/mean_terminated_length": 141.8125,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.1468304842710495,
	"epoch": 2.2588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4959217309951782,
	"learning_rate": 2.5098039215686275e-07,
	"loss": 0.0,
	"num_tokens": 29969760.0,
	"reward": 0.6328116655349731,
	"reward_std": 0.20429277420043945,
	"rewards/rna_reward_fn/mean": 0.6328116655349731,
	"rewards/rna_reward_fn/std": 0.3653068244457245,
	"step": 192
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 472.0,
	"completions/max_terminated_length": 472.0,
	"completions/mean_length": 147.03125,
	"completions/mean_terminated_length": 147.03125,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.14507943391799927,
	"epoch": 2.2705882352941176,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.46249526739120483,
	"learning_rate": 2.4705882352941175e-07,
	"loss": -0.0,
	"num_tokens": 30121344.0,
	"reward": 0.6946768760681152,
	"reward_std": 0.16386722028255463,
	"rewards/rna_reward_fn/mean": 0.6946768760681152,
	"rewards/rna_reward_fn/std": 0.3166311979293823,
	"step": 193
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 347.0,
	"completions/max_terminated_length": 347.0,
	"completions/mean_length": 119.1875,
	"completions/mean_terminated_length": 119.1875,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.1289873719215393,
	"epoch": 2.2823529411764705,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43832215666770935,
	"learning_rate": 2.431372549019608e-07,
	"loss": -0.0,
	"num_tokens": 30244416.0,
	"reward": 0.7309268116950989,
	"reward_std": 0.16351744532585144,
	"rewards/rna_reward_fn/mean": 0.7309267520904541,
	"rewards/rna_reward_fn/std": 0.27468279004096985,
	"step": 194
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 132.40625,
	"completions/mean_terminated_length": 132.40625,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.14909712970256805,
	"epoch": 2.2941176470588234,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4866437613964081,
	"learning_rate": 2.392156862745098e-07,
	"loss": -0.0,
	"num_tokens": 30381024.0,
	"reward": 0.6669021844863892,
	"reward_std": 0.19414769113063812,
	"rewards/rna_reward_fn/mean": 0.6669021844863892,
	"rewards/rna_reward_fn/std": 0.3391817808151245,
	"step": 195
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 174.0,
	"completions/mean_terminated_length": 174.0,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.14798294007778168,
	"epoch": 2.3058823529411763,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.590640127658844,
	"learning_rate": 2.352941176470588e-07,
	"loss": -0.0,
	"num_tokens": 30560224.0,
	"reward": 0.6385676860809326,
	"reward_std": 0.20142759382724762,
	"rewards/rna_reward_fn/mean": 0.6385676860809326,
	"rewards/rna_reward_fn/std": 0.34272608160972595,
	"step": 196
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 125.125,
	"completions/mean_terminated_length": 125.125,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.1469191089272499,
	"epoch": 2.317647058823529,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4721366763114929,
	"learning_rate": 2.3137254901960785e-07,
	"loss": -0.0,
	"num_tokens": 30689376.0,
	"reward": 0.7269188165664673,
	"reward_std": 0.19917072355747223,
	"rewards/rna_reward_fn/mean": 0.7269188165664673,
	"rewards/rna_reward_fn/std": 0.3235536217689514,
	"step": 197
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 170.21875,
	"completions/mean_terminated_length": 170.21875,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.1481616050004959,
	"epoch": 2.3294117647058825,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4824952483177185,
	"learning_rate": 2.2745098039215685e-07,
	"loss": 0.0,
	"num_tokens": 30864704.0,
	"reward": 0.7315170764923096,
	"reward_std": 0.19473856687545776,
	"rewards/rna_reward_fn/mean": 0.7315171360969543,
	"rewards/rna_reward_fn/std": 0.31163889169692993,
	"step": 198
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 124.21875,
	"completions/mean_terminated_length": 124.21875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.11309440433979034,
	"epoch": 2.3411764705882354,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43292057514190674,
	"learning_rate": 2.235294117647059e-07,
	"loss": -0.0,
	"num_tokens": 30992928.0,
	"reward": 0.6969711184501648,
	"reward_std": 0.18462812900543213,
	"rewards/rna_reward_fn/mean": 0.6969711780548096,
	"rewards/rna_reward_fn/std": 0.30229660868644714,
	"step": 199
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 295.0,
	"completions/max_terminated_length": 295.0,
	"completions/mean_length": 115.625,
	"completions/mean_terminated_length": 115.625,
	"completions/min_length": 28.0,
	"completions/min_terminated_length": 28.0,
	"entropy": 0.1170443557202816,
	"epoch": 2.3529411764705883,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42780736088752747,
	"learning_rate": 2.196078431372549e-07,
	"loss": 0.0,
	"num_tokens": 31112352.0,
	"reward": 0.7397186160087585,
	"reward_std": 0.16325643658638,
	"rewards/rna_reward_fn/mean": 0.7397185564041138,
	"rewards/rna_reward_fn/std": 0.2868645191192627,
	"step": 200
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 191.78125,
	"completions/mean_terminated_length": 191.78125,
	"completions/min_length": 20.0,
	"completions/min_terminated_length": 20.0,
	"entropy": 0.158894345164299,
	"epoch": 2.364705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5923020243644714,
	"learning_rate": 2.156862745098039e-07,
	"loss": 0.0,
	"num_tokens": 31309760.0,
	"reward": 0.713019609451294,
	"reward_std": 0.1600976586341858,
	"rewards/rna_reward_fn/mean": 0.7130196690559387,
	"rewards/rna_reward_fn/std": 0.3151859641075134,
	"step": 201
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 500.0,
	"completions/max_terminated_length": 500.0,
	"completions/mean_length": 167.15625,
	"completions/mean_terminated_length": 167.15625,
	"completions/min_length": 38.0,
	"completions/min_terminated_length": 38.0,
	"entropy": 0.15573827922344208,
	"epoch": 2.376470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5989984273910522,
	"learning_rate": 2.1176470588235293e-07,
	"loss": -0.0,
	"num_tokens": 31481952.0,
	"reward": 0.7245238423347473,
	"reward_std": 0.21510586142539978,
	"rewards/rna_reward_fn/mean": 0.7245238423347473,
	"rewards/rna_reward_fn/std": 0.3133554756641388,
	"step": 202
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 336.0,
	"completions/max_terminated_length": 336.0,
	"completions/mean_length": 147.15625,
	"completions/mean_terminated_length": 147.15625,
	"completions/min_length": 37.0,
	"completions/min_terminated_length": 37.0,
	"entropy": 0.14043358713388443,
	"epoch": 2.388235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45242446660995483,
	"learning_rate": 2.0784313725490195e-07,
	"loss": 0.0,
	"num_tokens": 31633664.0,
	"reward": 0.6685344576835632,
	"reward_std": 0.19693541526794434,
	"rewards/rna_reward_fn/mean": 0.6685344576835632,
	"rewards/rna_reward_fn/std": 0.33878231048583984,
	"step": 203
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 160.78125,
	"completions/mean_terminated_length": 160.78125,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.14151378720998764,
	"epoch": 2.4,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.578268826007843,
	"learning_rate": 2.0392156862745097e-07,
	"loss": 0.0,
	"num_tokens": 31799328.0,
	"reward": 0.753953218460083,
	"reward_std": 0.14072492718696594,
	"rewards/rna_reward_fn/mean": 0.753953218460083,
	"rewards/rna_reward_fn/std": 0.323638916015625,
	"step": 204
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 116.71875,
	"completions/mean_terminated_length": 116.71875,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.14078038185834885,
	"epoch": 2.411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5669292211532593,
	"learning_rate": 2e-07,
	"loss": 0.0,
	"num_tokens": 31919872.0,
	"reward": 0.7278470993041992,
	"reward_std": 0.18851059675216675,
	"rewards/rna_reward_fn/mean": 0.7278470993041992,
	"rewards/rna_reward_fn/std": 0.31520187854766846,
	"step": 205
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 165.1875,
	"completions/mean_terminated_length": 165.1875,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.1560438796877861,
	"epoch": 2.4235294117647057,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5335204005241394,
	"learning_rate": 1.96078431372549e-07,
	"loss": -0.0,
	"num_tokens": 32090048.0,
	"reward": 0.74782395362854,
	"reward_std": 0.16413238644599915,
	"rewards/rna_reward_fn/mean": 0.74782395362854,
	"rewards/rna_reward_fn/std": 0.27966901659965515,
	"step": 206
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 129.75,
	"completions/mean_terminated_length": 129.75,
	"completions/min_length": 32.0,
	"completions/min_terminated_length": 32.0,
	"entropy": 0.13756585866212845,
	"epoch": 2.435294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4791547358036041,
	"learning_rate": 1.9215686274509803e-07,
	"loss": -0.0,
	"num_tokens": 32223936.0,
	"reward": 0.7443541884422302,
	"reward_std": 0.20347487926483154,
	"rewards/rna_reward_fn/mean": 0.744354248046875,
	"rewards/rna_reward_fn/std": 0.2934330999851227,
	"step": 207
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.46875,
	"completions/mean_terminated_length": 144.46875,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.14090368151664734,
	"epoch": 2.447058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.48767152428627014,
	"learning_rate": 1.8823529411764705e-07,
	"loss": -0.0,
	"num_tokens": 32372896.0,
	"reward": 0.7094341516494751,
	"reward_std": 0.1646713763475418,
	"rewards/rna_reward_fn/mean": 0.7094341516494751,
	"rewards/rna_reward_fn/std": 0.31243574619293213,
	"step": 208
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 121.375,
	"completions/mean_terminated_length": 121.375,
	"completions/min_length": 19.0,
	"completions/min_terminated_length": 19.0,
	"entropy": 0.13812856376171112,
	"epoch": 2.458823529411765,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.43114832043647766,
	"learning_rate": 1.8431372549019607e-07,
	"loss": -0.0,
	"num_tokens": 32498208.0,
	"reward": 0.7636112570762634,
	"reward_std": 0.1354459822177887,
	"rewards/rna_reward_fn/mean": 0.7636112570762634,
	"rewards/rna_reward_fn/std": 0.2837965786457062,
	"step": 209
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.9375,
	"completions/mean_terminated_length": 157.9375,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.12325883284211159,
	"epoch": 2.4705882352941178,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7042959928512573,
	"learning_rate": 1.803921568627451e-07,
	"loss": -0.0,
	"num_tokens": 32660960.0,
	"reward": 0.685276985168457,
	"reward_std": 0.14444154500961304,
	"rewards/rna_reward_fn/mean": 0.685276985168457,
	"rewards/rna_reward_fn/std": 0.3264351785182953,
	"step": 210
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 398.0,
	"completions/max_terminated_length": 398.0,
	"completions/mean_length": 149.28125,
	"completions/mean_terminated_length": 149.28125,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.14060577005147934,
	"epoch": 2.4823529411764707,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7576245665550232,
	"learning_rate": 1.764705882352941e-07,
	"loss": 0.0,
	"num_tokens": 32814848.0,
	"reward": 0.7403950691223145,
	"reward_std": 0.19349028170108795,
	"rewards/rna_reward_fn/mean": 0.7403950691223145,
	"rewards/rna_reward_fn/std": 0.31960996985435486,
	"step": 211
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 140.09375,
	"completions/mean_terminated_length": 140.09375,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.128474622964859,
	"epoch": 2.4941176470588236,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4394446611404419,
	"learning_rate": 1.7254901960784313e-07,
	"loss": -0.0,
	"num_tokens": 32959328.0,
	"reward": 0.7468061447143555,
	"reward_std": 0.13857056200504303,
	"rewards/rna_reward_fn/mean": 0.7468062043190002,
	"rewards/rna_reward_fn/std": 0.2608503997325897,
	"step": 212
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 144.03125,
	"completions/mean_terminated_length": 144.03125,
	"completions/min_length": 34.0,
	"completions/min_terminated_length": 34.0,
	"entropy": 0.14114519208669662,
	"epoch": 2.5058823529411764,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.5121099352836609,
	"learning_rate": 1.6862745098039215e-07,
	"loss": 0.0,
	"num_tokens": 33107840.0,
	"reward": 0.6896160244941711,
	"reward_std": 0.17474885284900665,
	"rewards/rna_reward_fn/mean": 0.6896160244941711,
	"rewards/rna_reward_fn/std": 0.30136245489120483,
	"step": 213
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 196.625,
	"completions/mean_terminated_length": 196.625,
	"completions/min_length": 37.0,
	"completions/min_terminated_length": 37.0,
	"entropy": 0.1554037183523178,
	"epoch": 2.5176470588235293,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5231500864028931,
	"learning_rate": 1.6470588235294117e-07,
	"loss": 0.0,
	"num_tokens": 33310208.0,
	"reward": 0.7346584796905518,
	"reward_std": 0.20079070329666138,
	"rewards/rna_reward_fn/mean": 0.7346584796905518,
	"rewards/rna_reward_fn/std": 0.30361971259117126,
	"step": 214
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 138.9375,
	"completions/mean_terminated_length": 138.9375,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.12060126662254333,
	"epoch": 2.5294117647058822,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.45047426223754883,
	"learning_rate": 1.607843137254902e-07,
	"loss": 0.0,
	"num_tokens": 33453504.0,
	"reward": 0.768707275390625,
	"reward_std": 0.13694067299365997,
	"rewards/rna_reward_fn/mean": 0.7687073349952698,
	"rewards/rna_reward_fn/std": 0.27220436930656433,
	"step": 215
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 198.78125,
	"completions/mean_terminated_length": 198.78125,
	"completions/min_length": 23.0,
	"completions/min_terminated_length": 23.0,
	"entropy": 0.1575038880109787,
	"epoch": 2.541176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5329861044883728,
	"learning_rate": 1.5686274509803921e-07,
	"loss": -0.0,
	"num_tokens": 33658080.0,
	"reward": 0.7541199922561646,
	"reward_std": 0.15449070930480957,
	"rewards/rna_reward_fn/mean": 0.7541199922561646,
	"rewards/rna_reward_fn/std": 0.2656092345714569,
	"step": 216
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 412.0,
	"completions/max_terminated_length": 412.0,
	"completions/mean_length": 158.34375,
	"completions/mean_terminated_length": 158.34375,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.15501223504543304,
	"epoch": 2.552941176470588,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.45992547273635864,
	"learning_rate": 1.5294117647058823e-07,
	"loss": 0.0,
	"num_tokens": 33821248.0,
	"reward": 0.7572486400604248,
	"reward_std": 0.15161246061325073,
	"rewards/rna_reward_fn/mean": 0.7572486400604248,
	"rewards/rna_reward_fn/std": 0.29167696833610535,
	"step": 217
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 169.625,
	"completions/mean_terminated_length": 169.625,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.13358986377716064,
	"epoch": 2.564705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.8965858817100525,
	"learning_rate": 1.4901960784313725e-07,
	"loss": -0.0,
	"num_tokens": 33995968.0,
	"reward": 0.7292990684509277,
	"reward_std": 0.16865938901901245,
	"rewards/rna_reward_fn/mean": 0.7292990684509277,
	"rewards/rna_reward_fn/std": 0.30115416646003723,
	"step": 218
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 176.78125,
	"completions/mean_terminated_length": 176.78125,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.13434413820505142,
	"epoch": 2.576470588235294,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.582165002822876,
	"learning_rate": 1.4509803921568628e-07,
	"loss": -0.0,
	"num_tokens": 34178016.0,
	"reward": 0.6599196195602417,
	"reward_std": 0.196761354804039,
	"rewards/rna_reward_fn/mean": 0.6599196791648865,
	"rewards/rna_reward_fn/std": 0.33999550342559814,
	"step": 219
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 156.1875,
	"completions/mean_terminated_length": 156.1875,
	"completions/min_length": 35.0,
	"completions/min_terminated_length": 35.0,
	"entropy": 0.1357617899775505,
	"epoch": 2.588235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5189464688301086,
	"learning_rate": 1.411764705882353e-07,
	"loss": 0.0,
	"num_tokens": 34338976.0,
	"reward": 0.7549696564674377,
	"reward_std": 0.1326015144586563,
	"rewards/rna_reward_fn/mean": 0.7549696564674377,
	"rewards/rna_reward_fn/std": 0.2852962613105774,
	"step": 220
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 148.15625,
	"completions/mean_terminated_length": 148.15625,
	"completions/min_length": 24.0,
	"completions/min_terminated_length": 24.0,
	"entropy": 0.15427181124687195,
	"epoch": 2.6,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.536194920539856,
	"learning_rate": 1.3725490196078432e-07,
	"loss": 0.0,
	"num_tokens": 34491712.0,
	"reward": 0.7131255865097046,
	"reward_std": 0.14100758731365204,
	"rewards/rna_reward_fn/mean": 0.7131255865097046,
	"rewards/rna_reward_fn/std": 0.31784212589263916,
	"step": 221
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 380.0,
	"completions/max_terminated_length": 380.0,
	"completions/mean_length": 145.1875,
	"completions/mean_terminated_length": 145.1875,
	"completions/min_length": 31.0,
	"completions/min_terminated_length": 31.0,
	"entropy": 0.13709458708763123,
	"epoch": 2.611764705882353,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.5712235569953918,
	"learning_rate": 1.3333333333333334e-07,
	"loss": 0.0,
	"num_tokens": 34641408.0,
	"reward": 0.7191460132598877,
	"reward_std": 0.16943207383155823,
	"rewards/rna_reward_fn/mean": 0.7191460132598877,
	"rewards/rna_reward_fn/std": 0.3015574514865875,
	"step": 222
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.1875,
	"completions/mean_terminated_length": 145.1875,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.13566020876169205,
	"epoch": 2.623529411764706,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.4192090630531311,
	"learning_rate": 1.2941176470588236e-07,
	"loss": 0.0,
	"num_tokens": 34791104.0,
	"reward": 0.7555572986602783,
	"reward_std": 0.16786056756973267,
	"rewards/rna_reward_fn/mean": 0.7555572986602783,
	"rewards/rna_reward_fn/std": 0.2797638177871704,
	"step": 223
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 165.09375,
	"completions/mean_terminated_length": 165.09375,
	"completions/min_length": 41.0,
	"completions/min_terminated_length": 41.0,
	"entropy": 0.12663453072309494,
	"epoch": 2.635294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6057937145233154,
	"learning_rate": 1.2549019607843138e-07,
	"loss": -0.0,
	"num_tokens": 34961184.0,
	"reward": 0.6839346289634705,
	"reward_std": 0.19452279806137085,
	"rewards/rna_reward_fn/mean": 0.6839346289634705,
	"rewards/rna_reward_fn/std": 0.33146002888679504,
	"step": 224
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.65625,
	"completions/mean_terminated_length": 167.65625,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.1426771581172943,
	"epoch": 2.6470588235294117,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4763612747192383,
	"learning_rate": 1.215686274509804e-07,
	"loss": 0.0,
	"num_tokens": 35133888.0,
	"reward": 0.6619032621383667,
	"reward_std": 0.17893120646476746,
	"rewards/rna_reward_fn/mean": 0.6619032621383667,
	"rewards/rna_reward_fn/std": 0.3283209800720215,
	"step": 225
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 149.9375,
	"completions/mean_terminated_length": 149.9375,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.14778528362512589,
	"epoch": 2.6588235294117646,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4169410169124603,
	"learning_rate": 1.176470588235294e-07,
	"loss": -0.0,
	"num_tokens": 35288448.0,
	"reward": 0.6732456088066101,
	"reward_std": 0.16452832520008087,
	"rewards/rna_reward_fn/mean": 0.6732455492019653,
	"rewards/rna_reward_fn/std": 0.3249601721763611,
	"step": 226
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.09375,
	"completions/mean_terminated_length": 145.09375,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.1449032723903656,
	"epoch": 2.6705882352941175,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6590065360069275,
	"learning_rate": 1.1372549019607842e-07,
	"loss": -0.0,
	"num_tokens": 35438048.0,
	"reward": 0.7874460220336914,
	"reward_std": 0.12049897015094757,
	"rewards/rna_reward_fn/mean": 0.7874460220336914,
	"rewards/rna_reward_fn/std": 0.2661431133747101,
	"step": 227
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 151.75,
	"completions/mean_terminated_length": 151.75,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.13789667189121246,
	"epoch": 2.682352941176471,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.501124918460846,
	"learning_rate": 1.0980392156862744e-07,
	"loss": -0.0,
	"num_tokens": 35594464.0,
	"reward": 0.76551353931427,
	"reward_std": 0.14058314263820648,
	"rewards/rna_reward_fn/mean": 0.7655135989189148,
	"rewards/rna_reward_fn/std": 0.2855876088142395,
	"step": 228
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 163.71875,
	"completions/mean_terminated_length": 163.71875,
	"completions/min_length": 15.0,
	"completions/min_terminated_length": 15.0,
	"entropy": 0.14094559848308563,
	"epoch": 2.6941176470588237,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.736441433429718,
	"learning_rate": 1.0588235294117647e-07,
	"loss": 0.0,
	"num_tokens": 35763136.0,
	"reward": 0.6939565539360046,
	"reward_std": 0.16584208607673645,
	"rewards/rna_reward_fn/mean": 0.6939565539360046,
	"rewards/rna_reward_fn/std": 0.32086971402168274,
	"step": 229
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 139.78125,
	"completions/mean_terminated_length": 139.78125,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"entropy": 0.13419293239712715,
	"epoch": 2.7058823529411766,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6264002919197083,
	"learning_rate": 1.0196078431372549e-07,
	"loss": -0.0,
	"num_tokens": 35907296.0,
	"reward": 0.7488532066345215,
	"reward_std": 0.1620199978351593,
	"rewards/rna_reward_fn/mean": 0.7488532066345215,
	"rewards/rna_reward_fn/std": 0.2980068624019623,
	"step": 230
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 397.0,
	"completions/max_terminated_length": 397.0,
	"completions/mean_length": 137.40625,
	"completions/mean_terminated_length": 137.40625,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.13055864721536636,
	"epoch": 2.7176470588235295,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4814888536930084,
	"learning_rate": 9.80392156862745e-08,
	"loss": 0.0,
	"num_tokens": 36049024.0,
	"reward": 0.6655980348587036,
	"reward_std": 0.15648490190505981,
	"rewards/rna_reward_fn/mean": 0.6655980348587036,
	"rewards/rna_reward_fn/std": 0.35470837354660034,
	"step": 231
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 130.90625,
	"completions/mean_terminated_length": 130.90625,
	"completions/min_length": 14.0,
	"completions/min_terminated_length": 14.0,
	"entropy": 0.12380100041627884,
	"epoch": 2.7294117647058824,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.583757221698761,
	"learning_rate": 9.411764705882353e-08,
	"loss": -0.0,
	"num_tokens": 36184096.0,
	"reward": 0.7524540424346924,
	"reward_std": 0.15423446893692017,
	"rewards/rna_reward_fn/mean": 0.7524540424346924,
	"rewards/rna_reward_fn/std": 0.28454405069351196,
	"step": 232
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 399.0,
	"completions/max_terminated_length": 399.0,
	"completions/mean_length": 145.34375,
	"completions/mean_terminated_length": 145.34375,
	"completions/min_length": 27.0,
	"completions/min_terminated_length": 27.0,
	"entropy": 0.1325184628367424,
	"epoch": 2.7411764705882353,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4390006959438324,
	"learning_rate": 9.019607843137255e-08,
	"loss": -0.0,
	"num_tokens": 36333952.0,
	"reward": 0.7277975082397461,
	"reward_std": 0.19573622941970825,
	"rewards/rna_reward_fn/mean": 0.7277975082397461,
	"rewards/rna_reward_fn/std": 0.32145431637763977,
	"step": 233
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 168.125,
	"completions/mean_terminated_length": 168.125,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.13657083362340927,
	"epoch": 2.7529411764705882,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7681740522384644,
	"learning_rate": 8.627450980392157e-08,
	"loss": -0.0,
	"num_tokens": 36507136.0,
	"reward": 0.7168524265289307,
	"reward_std": 0.18613344430923462,
	"rewards/rna_reward_fn/mean": 0.7168524265289307,
	"rewards/rna_reward_fn/std": 0.3243979215621948,
	"step": 234
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 163.875,
	"completions/mean_terminated_length": 163.875,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.14333349466323853,
	"epoch": 2.764705882352941,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5657479763031006,
	"learning_rate": 8.235294117647059e-08,
	"loss": 0.0,
	"num_tokens": 36675968.0,
	"reward": 0.725771427154541,
	"reward_std": 0.16519448161125183,
	"rewards/rna_reward_fn/mean": 0.725771427154541,
	"rewards/rna_reward_fn/std": 0.29766252636909485,
	"step": 235
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 418.0,
	"completions/max_terminated_length": 418.0,
	"completions/mean_length": 156.46875,
	"completions/mean_terminated_length": 156.46875,
	"completions/min_length": 21.0,
	"completions/min_terminated_length": 21.0,
	"entropy": 0.1441263109445572,
	"epoch": 2.776470588235294,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.4572143256664276,
	"learning_rate": 7.843137254901961e-08,
	"loss": 0.0,
	"num_tokens": 36837216.0,
	"reward": 0.742597222328186,
	"reward_std": 0.16114118695259094,
	"rewards/rna_reward_fn/mean": 0.742597222328186,
	"rewards/rna_reward_fn/std": 0.29970842599868774,
	"step": 236
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 396.0,
	"completions/max_terminated_length": 396.0,
	"completions/mean_length": 158.1875,
	"completions/mean_terminated_length": 158.1875,
	"completions/min_length": 39.0,
	"completions/min_terminated_length": 39.0,
	"entropy": 0.1409977823495865,
	"epoch": 2.788235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.42590776085853577,
	"learning_rate": 7.450980392156863e-08,
	"loss": -0.0,
	"num_tokens": 37000224.0,
	"reward": 0.7145720720291138,
	"reward_std": 0.164639413356781,
	"rewards/rna_reward_fn/mean": 0.7145720720291138,
	"rewards/rna_reward_fn/std": 0.3098330497741699,
	"step": 237
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 276.0,
	"completions/max_terminated_length": 276.0,
	"completions/mean_length": 107.21875,
	"completions/mean_terminated_length": 107.21875,
	"completions/min_length": 33.0,
	"completions/min_terminated_length": 33.0,
	"entropy": 0.11754556372761726,
	"epoch": 2.8,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4764781892299652,
	"learning_rate": 7.058823529411765e-08,
	"loss": 0.0,
	"num_tokens": 37111040.0,
	"reward": 0.7425558567047119,
	"reward_std": 0.16547845304012299,
	"rewards/rna_reward_fn/mean": 0.7425558567047119,
	"rewards/rna_reward_fn/std": 0.3051395118236542,
	"step": 238
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 492.0,
	"completions/max_terminated_length": 492.0,
	"completions/mean_length": 172.84375,
	"completions/mean_terminated_length": 172.84375,
	"completions/min_length": 42.0,
	"completions/min_terminated_length": 42.0,
	"entropy": 0.14019257575273514,
	"epoch": 2.8117647058823527,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5157439708709717,
	"learning_rate": 6.666666666666667e-08,
	"loss": -0.0,
	"num_tokens": 37289056.0,
	"reward": 0.6816315650939941,
	"reward_std": 0.2366928905248642,
	"rewards/rna_reward_fn/mean": 0.6816315650939941,
	"rewards/rna_reward_fn/std": 0.326466828584671,
	"step": 239
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 164.15625,
	"completions/mean_terminated_length": 164.15625,
	"completions/min_length": 37.0,
	"completions/min_terminated_length": 37.0,
	"entropy": 0.1466379389166832,
	"epoch": 2.8235294117647056,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5139991044998169,
	"learning_rate": 6.274509803921569e-08,
	"loss": 0.0,
	"num_tokens": 37458176.0,
	"reward": 0.7532614469528198,
	"reward_std": 0.1603999137878418,
	"rewards/rna_reward_fn/mean": 0.7532614469528198,
	"rewards/rna_reward_fn/std": 0.31244710087776184,
	"step": 240
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 157.15625,
	"completions/mean_terminated_length": 157.15625,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.12356984615325928,
	"epoch": 2.835294117647059,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.9720450043678284,
	"learning_rate": 5.88235294117647e-08,
	"loss": -0.0,
	"num_tokens": 37620128.0,
	"reward": 0.7346148490905762,
	"reward_std": 0.15429024398326874,
	"rewards/rna_reward_fn/mean": 0.7346148490905762,
	"rewards/rna_reward_fn/std": 0.31154975295066833,
	"step": 241
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 153.5,
	"completions/mean_terminated_length": 153.5,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.1341606229543686,
	"epoch": 2.847058823529412,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5591171979904175,
	"learning_rate": 5.490196078431372e-08,
	"loss": -0.0,
	"num_tokens": 37778336.0,
	"reward": 0.7116289138793945,
	"reward_std": 0.21866443753242493,
	"rewards/rna_reward_fn/mean": 0.7116289138793945,
	"rewards/rna_reward_fn/std": 0.2980954051017761,
	"step": 242
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 482.0,
	"completions/max_terminated_length": 482.0,
	"completions/mean_length": 203.4375,
	"completions/mean_terminated_length": 203.4375,
	"completions/min_length": 40.0,
	"completions/min_terminated_length": 40.0,
	"entropy": 0.14845673739910126,
	"epoch": 2.8588235294117648,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5372319221496582,
	"learning_rate": 5.098039215686274e-08,
	"loss": 0.0,
	"num_tokens": 37987680.0,
	"reward": 0.7392944693565369,
	"reward_std": 0.19700977206230164,
	"rewards/rna_reward_fn/mean": 0.7392945289611816,
	"rewards/rna_reward_fn/std": 0.30940258502960205,
	"step": 243
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 139.6875,
	"completions/mean_terminated_length": 139.6875,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.13047143816947937,
	"epoch": 2.8705882352941177,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5987316370010376,
	"learning_rate": 4.705882352941176e-08,
	"loss": -0.0,
	"num_tokens": 38131744.0,
	"reward": 0.6977779269218445,
	"reward_std": 0.2151854932308197,
	"rewards/rna_reward_fn/mean": 0.6977779269218445,
	"rewards/rna_reward_fn/std": 0.3459690511226654,
	"step": 244
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 490.0,
	"completions/max_terminated_length": 490.0,
	"completions/mean_length": 148.40625,
	"completions/mean_terminated_length": 148.40625,
	"completions/min_length": 22.0,
	"completions/min_terminated_length": 22.0,
	"entropy": 0.14810562878847122,
	"epoch": 2.8823529411764706,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.7430775165557861,
	"learning_rate": 4.313725490196078e-08,
	"loss": -0.0,
	"num_tokens": 38284736.0,
	"reward": 0.6900802254676819,
	"reward_std": 0.18723735213279724,
	"rewards/rna_reward_fn/mean": 0.6900802254676819,
	"rewards/rna_reward_fn/std": 0.3328934609889984,
	"step": 245
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 139.5625,
	"completions/mean_terminated_length": 139.5625,
	"completions/min_length": 30.0,
	"completions/min_terminated_length": 30.0,
	"entropy": 0.12182106822729111,
	"epoch": 2.8941176470588235,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.49635204672813416,
	"learning_rate": 3.9215686274509804e-08,
	"loss": 0.0,
	"num_tokens": 38428672.0,
	"reward": 0.7072439193725586,
	"reward_std": 0.1840672791004181,
	"rewards/rna_reward_fn/mean": 0.7072439193725586,
	"rewards/rna_reward_fn/std": 0.3065541088581085,
	"step": 246
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 167.6875,
	"completions/mean_terminated_length": 167.6875,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.13815301656723022,
	"epoch": 2.9058823529411764,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.8550586104393005,
	"learning_rate": 3.5294117647058824e-08,
	"loss": -0.0,
	"num_tokens": 38601408.0,
	"reward": 0.7532185316085815,
	"reward_std": 0.1475568264722824,
	"rewards/rna_reward_fn/mean": 0.7532185316085815,
	"rewards/rna_reward_fn/std": 0.29489991068840027,
	"step": 247
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 340.0,
	"completions/max_terminated_length": 340.0,
	"completions/mean_length": 122.34375,
	"completions/mean_terminated_length": 122.34375,
	"completions/min_length": 29.0,
	"completions/min_terminated_length": 29.0,
	"entropy": 0.12259503453969955,
	"epoch": 2.9176470588235293,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.44689512252807617,
	"learning_rate": 3.1372549019607844e-08,
	"loss": 0.0,
	"num_tokens": 38727712.0,
	"reward": 0.7440149784088135,
	"reward_std": 0.1674138307571411,
	"rewards/rna_reward_fn/mean": 0.7440149188041687,
	"rewards/rna_reward_fn/std": 0.3040436804294586,
	"step": 248
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 474.0,
	"completions/max_terminated_length": 474.0,
	"completions/mean_length": 192.0,
	"completions/mean_terminated_length": 192.0,
	"completions/min_length": 42.0,
	"completions/min_terminated_length": 42.0,
	"entropy": 0.1282111555337906,
	"epoch": 2.9294117647058826,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5679563879966736,
	"learning_rate": 2.745098039215686e-08,
	"loss": 0.0,
	"num_tokens": 38925344.0,
	"reward": 0.6850175857543945,
	"reward_std": 0.19530020654201508,
	"rewards/rna_reward_fn/mean": 0.6850175857543945,
	"rewards/rna_reward_fn/std": 0.33921393752098083,
	"step": 249
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 117.09375,
	"completions/mean_terminated_length": 117.09375,
	"completions/min_length": 17.0,
	"completions/min_terminated_length": 17.0,
	"entropy": 0.12855321913957596,
	"epoch": 2.9411764705882355,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.505153238773346,
	"learning_rate": 2.352941176470588e-08,
	"loss": -0.0,
	"num_tokens": 39046272.0,
	"reward": 0.6269246339797974,
	"reward_std": 0.16829745471477509,
	"rewards/rna_reward_fn/mean": 0.6269246339797974,
	"rewards/rna_reward_fn/std": 0.33109787106513977,
	"step": 250
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 424.0,
	"completions/max_terminated_length": 424.0,
	"completions/mean_length": 121.0,
	"completions/mean_terminated_length": 121.0,
	"completions/min_length": 18.0,
	"completions/min_terminated_length": 18.0,
	"entropy": 0.12059168517589569,
	"epoch": 2.9529411764705884,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.4366406500339508,
	"learning_rate": 1.9607843137254902e-08,
	"loss": 0.0,
	"num_tokens": 39171200.0,
	"reward": 0.7053718566894531,
	"reward_std": 0.14770260453224182,
	"rewards/rna_reward_fn/mean": 0.7053717970848083,
	"rewards/rna_reward_fn/std": 0.3234374523162842,
	"step": 251
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 327.0,
	"completions/max_terminated_length": 327.0,
	"completions/mean_length": 132.1875,
	"completions/mean_terminated_length": 132.1875,
	"completions/min_length": 26.0,
	"completions/min_terminated_length": 26.0,
	"entropy": 0.13018939644098282,
	"epoch": 2.9647058823529413,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6731492280960083,
	"learning_rate": 1.5686274509803922e-08,
	"loss": 0.0,
	"num_tokens": 39307584.0,
	"reward": 0.7679715752601624,
	"reward_std": 0.17536047101020813,
	"rewards/rna_reward_fn/mean": 0.7679715156555176,
	"rewards/rna_reward_fn/std": 0.2801183760166168,
	"step": 252
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 145.40625,
	"completions/mean_terminated_length": 145.40625,
	"completions/min_length": 37.0,
	"completions/min_terminated_length": 37.0,
	"entropy": 0.10920717194676399,
	"epoch": 2.976470588235294,
	"frac_reward_zero_std": 0.03125,
	"grad_norm": 0.46245628595352173,
	"learning_rate": 1.176470588235294e-08,
	"loss": 0.0,
	"num_tokens": 39457504.0,
	"reward": 0.7559751272201538,
	"reward_std": 0.15144692361354828,
	"rewards/rna_reward_fn/mean": 0.7559751272201538,
	"rewards/rna_reward_fn/std": 0.3152746260166168,
	"step": 253
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 193.3125,
	"completions/mean_terminated_length": 193.3125,
	"completions/min_length": 25.0,
	"completions/min_terminated_length": 25.0,
	"entropy": 0.15460387617349625,
	"epoch": 2.988235294117647,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.6124170422554016,
	"learning_rate": 7.843137254901961e-09,
	"loss": 0.0,
	"num_tokens": 39656480.0,
	"reward": 0.7068374752998352,
	"reward_std": 0.19490104913711548,
	"rewards/rna_reward_fn/mean": 0.7068374752998352,
	"rewards/rna_reward_fn/std": 0.310377836227417,
	"step": 254
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 401.0,
	"completions/max_terminated_length": 401.0,
	"completions/mean_length": 149.5,
	"completions/mean_terminated_length": 149.5,
	"completions/min_length": 36.0,
	"completions/min_terminated_length": 36.0,
	"entropy": 0.1327020823955536,
	"epoch": 3.0,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 0.5195903778076172,
	"learning_rate": 3.9215686274509805e-09,
	"loss": -0.0,
	"num_tokens": 39810592.0,
	"reward": 0.7493961453437805,
	"reward_std": 0.17497789859771729,
	"rewards/rna_reward_fn/mean": 0.7493961453437805,
	"rewards/rna_reward_fn/std": 0.31194695830345154,
	"step": 255
	}
	],
	"logging_steps": 1.0,
	"max_steps": 255,
	"num_input_tokens_seen": 39810592,
	"num_train_epochs": 3,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 256,
	"trial_name": null,
	"trial_params": null
	}