| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 125.0, | |
| "eval_steps": 50, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.203125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 565.0, | |
| "completions/mean_length": 382.40625, | |
| "completions/mean_terminated_length": 218.86274509803923, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 0.5, | |
| "grad_norm": 7.220178667921573e-05, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0008, | |
| "num_tokens": 44698.0, | |
| "reward": 0.616805911064148, | |
| "reward_std": 0.03843851387500763, | |
| "rewards/reward_matching": 0.5590299367904663, | |
| "rewards/reward_object_count": 0.40693962574005127, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 284.40625, | |
| "completions/mean_terminated_length": 207.89655172413794, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 1.0, | |
| "grad_norm": 9.472812234889716e-05, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 84084.0, | |
| "reward": 0.5493605732917786, | |
| "reward_std": 0.05996648967266083, | |
| "rewards/reward_matching": 0.4322524964809418, | |
| "rewards/reward_object_count": 0.4500454068183899, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 693.0, | |
| "completions/mean_length": 319.875, | |
| "completions/mean_terminated_length": 157.3846153846154, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 1.5, | |
| "grad_norm": 9.924145706463605e-05, | |
| "kl": 0.00042467157436476555, | |
| "learning_rate": 2e-06, | |
| "loss": 0.0015, | |
| "num_tokens": 123180.0, | |
| "reward": 0.5689160823822021, | |
| "reward_std": 0.04476189613342285, | |
| "rewards/reward_matching": 0.47214236855506897, | |
| "rewards/reward_object_count": 0.4281533360481262, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 834.0, | |
| "completions/mean_length": 308.015625, | |
| "completions/mean_terminated_length": 142.78846153846155, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 2.0, | |
| "grad_norm": 5.0325375923421234e-05, | |
| "kl": 0.0004158883857598994, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 165037.0, | |
| "reward": 0.6509107351303101, | |
| "reward_std": 0.03968992456793785, | |
| "rewards/reward_matching": 0.5518875122070312, | |
| "rewards/reward_object_count": 0.5988913774490356, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 269.28125, | |
| "completions/mean_terminated_length": 244.93548387096774, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 2.5, | |
| "grad_norm": 0.00011539691331563517, | |
| "kl": 0.0006114643038017675, | |
| "learning_rate": 4e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 201535.0, | |
| "reward": 0.6278109550476074, | |
| "reward_std": 0.05065031349658966, | |
| "rewards/reward_matching": 0.5660988092422485, | |
| "rewards/reward_object_count": 0.44075822830200195, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 353.546875, | |
| "completions/mean_terminated_length": 229.38888888888889, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 3.0, | |
| "grad_norm": 0.0003470871888566762, | |
| "kl": 0.00038929956645006314, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": -0.0011, | |
| "num_tokens": 241506.0, | |
| "reward": 0.5265649557113647, | |
| "reward_std": 0.08980950713157654, | |
| "rewards/reward_matching": 0.4059111475944519, | |
| "rewards/reward_object_count": 0.4463413953781128, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.296875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.0, | |
| "completions/mean_length": 455.703125, | |
| "completions/mean_terminated_length": 215.75555555555556, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 3.5, | |
| "grad_norm": 0.00015567304217256606, | |
| "kl": 0.0004744630350614898, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 290575.0, | |
| "reward": 0.5346964001655579, | |
| "reward_std": 0.08377400040626526, | |
| "rewards/reward_matching": 0.43381091952323914, | |
| "rewards/reward_object_count": 0.38767415285110474, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 890.0, | |
| "completions/mean_length": 494.71875, | |
| "completions/mean_terminated_length": 254.13636363636363, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 4.0, | |
| "grad_norm": 9.709381993161514e-05, | |
| "kl": 0.0003735233622137457, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0015, | |
| "num_tokens": 343741.0, | |
| "reward": 0.5899163484573364, | |
| "reward_std": 0.03980647027492523, | |
| "rewards/reward_matching": 0.5186960697174072, | |
| "rewards/reward_object_count": 0.3934932053089142, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 301.0, | |
| "completions/max_terminated_length": 301.0, | |
| "completions/mean_length": 78.6875, | |
| "completions/mean_terminated_length": 78.6875, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "epoch": 4.5, | |
| "grad_norm": 0.00030578242149204016, | |
| "kl": 0.0012195941890240647, | |
| "learning_rate": 8e-06, | |
| "loss": -0.001, | |
| "num_tokens": 370601.0, | |
| "reward": 0.6947240829467773, | |
| "reward_std": 0.09824053198099136, | |
| "rewards/reward_matching": 0.5668145418167114, | |
| "rewards/reward_object_count": 0.8044270873069763, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.109375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 988.0, | |
| "completions/mean_length": 223.171875, | |
| "completions/mean_terminated_length": 124.82456140350877, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 5.0, | |
| "grad_norm": 0.00012970698298886418, | |
| "kl": 0.0004933492928103078, | |
| "learning_rate": 9e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 406388.0, | |
| "reward": 0.6668163537979126, | |
| "reward_std": 0.056093666702508926, | |
| "rewards/reward_matching": 0.5769085884094238, | |
| "rewards/reward_object_count": 0.6189813613891602, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 271.0, | |
| "completions/mean_length": 123.21875, | |
| "completions/mean_terminated_length": 78.91803278688525, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 5.5, | |
| "grad_norm": 0.00010655791993485764, | |
| "kl": 0.0008107178764475975, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 435458.0, | |
| "reward": 0.56708824634552, | |
| "reward_std": 0.10232001543045044, | |
| "rewards/reward_matching": 0.419674813747406, | |
| "rewards/reward_object_count": 0.5920416116714478, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 505.0, | |
| "completions/mean_length": 278.671875, | |
| "completions/mean_terminated_length": 140.64814814814815, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "epoch": 6.0, | |
| "grad_norm": 6.27185363555327e-05, | |
| "kl": 0.000603774591581896, | |
| "learning_rate": 1.1e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 474797.0, | |
| "reward": 0.6307092308998108, | |
| "reward_std": 0.042993463575839996, | |
| "rewards/reward_matching": 0.5698345899581909, | |
| "rewards/reward_object_count": 0.4440425634384155, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 896.0, | |
| "completions/mean_length": 519.484375, | |
| "completions/mean_terminated_length": 290.15909090909093, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 6.5, | |
| "grad_norm": 0.0002488858881406486, | |
| "kl": 0.00022653781888948288, | |
| "learning_rate": 1.2e-05, | |
| "loss": -0.0021, | |
| "num_tokens": 529548.0, | |
| "reward": 0.5722981691360474, | |
| "reward_std": 0.0947316363453865, | |
| "rewards/reward_matching": 0.5071631073951721, | |
| "rewards/reward_object_count": 0.3868764042854309, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 436.0, | |
| "completions/mean_length": 136.0625, | |
| "completions/mean_terminated_length": 107.41935483870968, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 7.0, | |
| "grad_norm": 4.9686714191921055e-05, | |
| "kl": 0.0007801693172950763, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 556880.0, | |
| "reward": 0.6056115031242371, | |
| "reward_std": 0.05535212904214859, | |
| "rewards/reward_matching": 0.5002501010894775, | |
| "rewards/reward_object_count": 0.5273073315620422, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 865.0, | |
| "completions/mean_length": 477.109375, | |
| "completions/mean_terminated_length": 228.52272727272728, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 7.5, | |
| "grad_norm": 0.00013081179349683225, | |
| "kl": 0.0002415532035229262, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 602839.0, | |
| "reward": 0.5026674270629883, | |
| "reward_std": 0.07394808530807495, | |
| "rewards/reward_matching": 0.38987797498703003, | |
| "rewards/reward_object_count": 0.359328031539917, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 86.21875, | |
| "completions/mean_terminated_length": 86.21875, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 8.0, | |
| "grad_norm": 0.00016548681014683098, | |
| "kl": 0.0009981263428926468, | |
| "learning_rate": 1.5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 629221.0, | |
| "reward": 0.6400452852249146, | |
| "reward_std": 0.07901425659656525, | |
| "rewards/reward_matching": 0.49512743949890137, | |
| "rewards/reward_object_count": 0.73046875, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.171875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 970.0, | |
| "completions/mean_length": 356.625, | |
| "completions/mean_terminated_length": 218.11320754716982, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 8.5, | |
| "grad_norm": 0.0001052798397722654, | |
| "kl": 0.000348803438100731, | |
| "learning_rate": 1.6e-05, | |
| "loss": 0.0017, | |
| "num_tokens": 670349.0, | |
| "reward": 0.5609536170959473, | |
| "reward_std": 0.05881837010383606, | |
| "rewards/reward_matching": 0.4391196370124817, | |
| "rewards/reward_object_count": 0.48740923404693604, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 104.28125, | |
| "completions/mean_terminated_length": 104.28125, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 9.0, | |
| "grad_norm": 0.0001435808662790805, | |
| "kl": 0.0010840660106623545, | |
| "learning_rate": 1.7e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 696927.0, | |
| "reward": 0.6028817892074585, | |
| "reward_std": 0.08744296431541443, | |
| "rewards/reward_matching": 0.5085169672966003, | |
| "rewards/reward_object_count": 0.5044828653335571, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.203125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 634.0, | |
| "completions/mean_length": 341.03125, | |
| "completions/mean_terminated_length": 166.94117647058823, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 9.5, | |
| "grad_norm": 7.831490802345797e-05, | |
| "kl": 0.0003636158817243995, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 736417.0, | |
| "reward": 0.5801401138305664, | |
| "reward_std": 0.04980730637907982, | |
| "rewards/reward_matching": 0.4626754820346832, | |
| "rewards/reward_object_count": 0.5126744508743286, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 283.40625, | |
| "completions/mean_terminated_length": 112.5, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 10.0, | |
| "grad_norm": 5.31727600900922e-05, | |
| "kl": 0.0005135804531164467, | |
| "learning_rate": 1.9e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 773499.0, | |
| "reward": 0.6203708648681641, | |
| "reward_std": 0.0496257059276104, | |
| "rewards/reward_matching": 0.4899587631225586, | |
| "rewards/reward_object_count": 0.631977915763855, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.109375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1006.0, | |
| "completions/mean_length": 231.671875, | |
| "completions/mean_terminated_length": 134.3684210526316, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 10.5, | |
| "grad_norm": 5.5479034926975146e-05, | |
| "kl": 0.000991112490737578, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 808230.0, | |
| "reward": 0.6594911217689514, | |
| "reward_std": 0.044610194861888885, | |
| "rewards/reward_matching": 0.543105959892273, | |
| "rewards/reward_object_count": 0.668137788772583, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 855.0, | |
| "completions/mean_length": 272.296875, | |
| "completions/mean_terminated_length": 164.91071428571428, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 11.0, | |
| "grad_norm": 8.819045615382493e-05, | |
| "kl": 0.0005528706569748465, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 843961.0, | |
| "reward": 0.5963397026062012, | |
| "reward_std": 0.07382210344076157, | |
| "rewards/reward_matching": 0.45430704951286316, | |
| "rewards/reward_object_count": 0.6344020962715149, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 600.0, | |
| "completions/mean_length": 264.21875, | |
| "completions/mean_terminated_length": 185.6206896551724, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 11.5, | |
| "grad_norm": 0.0001272808003704995, | |
| "kl": 0.0010251196290482767, | |
| "learning_rate": 2.2e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 878215.0, | |
| "reward": 0.5910627841949463, | |
| "reward_std": 0.07647538185119629, | |
| "rewards/reward_matching": 0.4740520715713501, | |
| "rewards/reward_object_count": 0.5331577062606812, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.234375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1001.0, | |
| "completions/mean_length": 437.671875, | |
| "completions/mean_terminated_length": 258.18367346938777, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 12.0, | |
| "grad_norm": 0.00030737402266822755, | |
| "kl": 0.0005441164066724014, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 928690.0, | |
| "reward": 0.4855830669403076, | |
| "reward_std": 0.11943839490413666, | |
| "rewards/reward_matching": 0.4223440885543823, | |
| "rewards/reward_object_count": 0.20775802433490753, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 425.546875, | |
| "completions/mean_terminated_length": 287.4423076923077, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 12.5, | |
| "grad_norm": 0.0001554730988573283, | |
| "kl": 0.0006814353218942415, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.0031, | |
| "num_tokens": 973589.0, | |
| "reward": 0.5343494415283203, | |
| "reward_std": 0.07961155474185944, | |
| "rewards/reward_matching": 0.44471532106399536, | |
| "rewards/reward_object_count": 0.3532262444496155, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 217.65625, | |
| "completions/mean_terminated_length": 134.24137931034483, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 13.0, | |
| "grad_norm": 0.0002844391274265945, | |
| "kl": 0.0010056742103188299, | |
| "learning_rate": 2.5e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 1005823.0, | |
| "reward": 0.5949710011482239, | |
| "reward_std": 0.09501722455024719, | |
| "rewards/reward_matching": 0.4553234577178955, | |
| "rewards/reward_object_count": 0.6557595729827881, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 436.0, | |
| "completions/mean_length": 212.03125, | |
| "completions/mean_terminated_length": 128.0344827586207, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 13.5, | |
| "grad_norm": 7.75867301854305e-05, | |
| "kl": 0.0008590275319875218, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 1037057.0, | |
| "reward": 0.5786451101303101, | |
| "reward_std": 0.05433168634772301, | |
| "rewards/reward_matching": 0.453294575214386, | |
| "rewards/reward_object_count": 0.5333421230316162, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.171875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 766.0, | |
| "completions/mean_length": 299.171875, | |
| "completions/mean_terminated_length": 148.73584905660377, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 14.0, | |
| "grad_norm": 0.00023781158961355686, | |
| "kl": 0.00094783199892845, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 1078668.0, | |
| "reward": 0.5741347074508667, | |
| "reward_std": 0.08269049972295761, | |
| "rewards/reward_matching": 0.5097981691360474, | |
| "rewards/reward_object_count": 0.37252914905548096, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.171875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 972.0, | |
| "completions/mean_length": 259.09375, | |
| "completions/mean_terminated_length": 100.33962264150944, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 14.5, | |
| "grad_norm": 6.763833516743034e-05, | |
| "kl": 0.0008891169927665032, | |
| "learning_rate": 2.8e-05, | |
| "loss": -0.0, | |
| "num_tokens": 1117394.0, | |
| "reward": 0.6306777000427246, | |
| "reward_std": 0.04761374741792679, | |
| "rewards/reward_matching": 0.5269917249679565, | |
| "rewards/reward_object_count": 0.5724132061004639, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 524.0, | |
| "completions/mean_length": 183.71875, | |
| "completions/mean_terminated_length": 170.38095238095238, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 15.0, | |
| "grad_norm": 6.627541006309912e-05, | |
| "kl": 0.0015845489542698488, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 1149696.0, | |
| "reward": 0.6976220011711121, | |
| "reward_std": 0.04097198694944382, | |
| "rewards/reward_matching": 0.6446974277496338, | |
| "rewards/reward_object_count": 0.5540179014205933, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 769.0, | |
| "completions/mean_length": 240.78125, | |
| "completions/mean_terminated_length": 202.2622950819672, | |
| "completions/min_length": 70.0, | |
| "completions/min_terminated_length": 70.0, | |
| "epoch": 15.5, | |
| "grad_norm": 0.00014589792408514768, | |
| "kl": 0.0013277196849230677, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 1185330.0, | |
| "reward": 0.5496550798416138, | |
| "reward_std": 0.06000591441988945, | |
| "rewards/reward_matching": 0.4662263095378876, | |
| "rewards/reward_object_count": 0.3495963215827942, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 96.34375, | |
| "completions/mean_terminated_length": 81.61904761904762, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 16.0, | |
| "grad_norm": 5.5187963880598545e-05, | |
| "kl": 0.004379737103590742, | |
| "learning_rate": 2.999992132854894e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 1209160.0, | |
| "reward": 0.6008960604667664, | |
| "reward_std": 0.05777881667017937, | |
| "rewards/reward_matching": 0.43078088760375977, | |
| "rewards/reward_object_count": 0.7121376991271973, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 387.0, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 117.546875, | |
| "completions/mean_terminated_length": 117.546875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 16.5, | |
| "grad_norm": 6.276514614000916e-05, | |
| "kl": 0.003105600946582854, | |
| "learning_rate": 2.999968531502098e-05, | |
| "loss": -0.0, | |
| "num_tokens": 1237867.0, | |
| "reward": 0.5474408268928528, | |
| "reward_std": 0.048626385629177094, | |
| "rewards/reward_matching": 0.4194202125072479, | |
| "rewards/reward_object_count": 0.47894346714019775, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 996.0, | |
| "completions/mean_length": 274.921875, | |
| "completions/mean_terminated_length": 197.43103448275863, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 17.0, | |
| "grad_norm": 8.7667656771373e-05, | |
| "kl": 0.0016155529519892298, | |
| "learning_rate": 2.99992919618918e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 1273126.0, | |
| "reward": 0.616933286190033, | |
| "reward_std": 0.06678177416324615, | |
| "rewards/reward_matching": 0.48520591855049133, | |
| "rewards/reward_object_count": 0.6290486454963684, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.21875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 832.0, | |
| "completions/mean_length": 384.578125, | |
| "completions/mean_terminated_length": 205.54, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 17.5, | |
| "grad_norm": 0.00012024820898659527, | |
| "kl": 0.0013034686198807321, | |
| "learning_rate": 2.999874127328748e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 1315403.0, | |
| "reward": 0.549712061882019, | |
| "reward_std": 0.06744106113910675, | |
| "rewards/reward_matching": 0.44805556535720825, | |
| "rewards/reward_object_count": 0.43564340472221375, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 403.0, | |
| "completions/mean_length": 145.71875, | |
| "completions/mean_terminated_length": 131.77777777777777, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 18.0, | |
| "grad_norm": 8.172642992576584e-05, | |
| "kl": 0.004074121621670201, | |
| "learning_rate": 2.9998033254984483e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 1341753.0, | |
| "reward": 0.5836251974105835, | |
| "reward_std": 0.06786907464265823, | |
| "rewards/reward_matching": 0.478513240814209, | |
| "rewards/reward_object_count": 0.4982115924358368, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.28125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 929.0, | |
| "completions/mean_length": 419.25, | |
| "completions/mean_terminated_length": 182.6086956521739, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 18.5, | |
| "grad_norm": 0.00017777887114789337, | |
| "kl": 0.0012582261915667914, | |
| "learning_rate": 2.999716791440959e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 1388809.0, | |
| "reward": 0.533015251159668, | |
| "reward_std": 0.10117805004119873, | |
| "rewards/reward_matching": 0.4379459619522095, | |
| "rewards/reward_object_count": 0.38248807191848755, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.140625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 895.0, | |
| "completions/mean_length": 339.390625, | |
| "completions/mean_terminated_length": 227.36363636363637, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 19.0, | |
| "grad_norm": 0.00014843710232526064, | |
| "kl": 0.002032484859228134, | |
| "learning_rate": 2.9996145260639812e-05, | |
| "loss": 0.0027, | |
| "num_tokens": 1429474.0, | |
| "reward": 0.5366367697715759, | |
| "reward_std": 0.09246323257684708, | |
| "rewards/reward_matching": 0.4080054759979248, | |
| "rewards/reward_object_count": 0.47479236125946045, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 969.0, | |
| "completions/mean_length": 370.875, | |
| "completions/mean_terminated_length": 220.15384615384616, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 19.5, | |
| "grad_norm": 0.00011248727241763845, | |
| "kl": 0.0019941654754802585, | |
| "learning_rate": 2.9994965304402304e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 1470554.0, | |
| "reward": 0.6027147769927979, | |
| "reward_std": 0.06692709028720856, | |
| "rewards/reward_matching": 0.4802256226539612, | |
| "rewards/reward_object_count": 0.5728965997695923, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 999.0, | |
| "completions/mean_length": 261.140625, | |
| "completions/mean_terminated_length": 152.16071428571428, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 20.0, | |
| "grad_norm": 7.97496541053988e-05, | |
| "kl": 0.0031269127648556605, | |
| "learning_rate": 2.999362805807425e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 1509091.0, | |
| "reward": 0.6954025030136108, | |
| "reward_std": 0.032408565282821655, | |
| "rewards/reward_matching": 0.6296951770782471, | |
| "rewards/reward_object_count": 0.587926983833313, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 337.0, | |
| "completions/mean_length": 104.671875, | |
| "completions/mean_terminated_length": 90.07936507936508, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 20.5, | |
| "grad_norm": 7.970151636982337e-05, | |
| "kl": 0.0068569304421544075, | |
| "learning_rate": 2.9992133535682725e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 1537934.0, | |
| "reward": 0.6217859983444214, | |
| "reward_std": 0.04923363775014877, | |
| "rewards/reward_matching": 0.4929783046245575, | |
| "rewards/reward_object_count": 0.6299948692321777, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.109375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 601.0, | |
| "completions/mean_length": 261.578125, | |
| "completions/mean_terminated_length": 167.94736842105263, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 21.0, | |
| "grad_norm": 0.0001107210700865835, | |
| "kl": 0.002925441396655515, | |
| "learning_rate": 2.9990481752904566e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 1572659.0, | |
| "reward": 0.5629563331604004, | |
| "reward_std": 0.059148214757442474, | |
| "rewards/reward_matching": 0.49224740266799927, | |
| "rewards/reward_object_count": 0.35366469621658325, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 428.6875, | |
| "completions/mean_terminated_length": 318.44444444444446, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 21.5, | |
| "grad_norm": 8.503717253915966e-05, | |
| "kl": 0.002438544644974172, | |
| "learning_rate": 2.9988672727066197e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 1618719.0, | |
| "reward": 0.5435788631439209, | |
| "reward_std": 0.05614367127418518, | |
| "rewards/reward_matching": 0.45421895384788513, | |
| "rewards/reward_object_count": 0.3552376627922058, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 425.0, | |
| "completions/max_terminated_length": 425.0, | |
| "completions/mean_length": 129.625, | |
| "completions/mean_terminated_length": 129.625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 22.0, | |
| "grad_norm": 0.00011053122580051422, | |
| "kl": 0.005899429728742689, | |
| "learning_rate": 2.9986706477143436e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 1646919.0, | |
| "reward": 0.6216758489608765, | |
| "reward_std": 0.07677946984767914, | |
| "rewards/reward_matching": 0.5217663645744324, | |
| "rewards/reward_object_count": 0.5430803298950195, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 601.0, | |
| "completions/mean_length": 424.140625, | |
| "completions/mean_terminated_length": 151.47727272727272, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 22.5, | |
| "grad_norm": 0.000124470898299478, | |
| "kl": 0.0022016632283339277, | |
| "learning_rate": 2.9984583023761318e-05, | |
| "loss": 0.002, | |
| "num_tokens": 1696528.0, | |
| "reward": 0.5888717174530029, | |
| "reward_std": 0.041133634746074677, | |
| "rewards/reward_matching": 0.5104244947433472, | |
| "rewards/reward_object_count": 0.41308486461639404, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.109375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 330.796875, | |
| "completions/mean_terminated_length": 245.66666666666666, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 23.0, | |
| "grad_norm": 0.00011844925757031888, | |
| "kl": 0.0031491442350670695, | |
| "learning_rate": 2.998230238919386e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 1733763.0, | |
| "reward": 0.5587866902351379, | |
| "reward_std": 0.05619703605771065, | |
| "rewards/reward_matching": 0.42642295360565186, | |
| "rewards/reward_object_count": 0.5146645307540894, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 236.0, | |
| "completions/mean_length": 117.53125, | |
| "completions/mean_terminated_length": 88.29032258064517, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 23.5, | |
| "grad_norm": 0.0001667703763814643, | |
| "kl": 0.007947034202516079, | |
| "learning_rate": 2.9979864597363846e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 1765349.0, | |
| "reward": 0.7034145593643188, | |
| "reward_std": 0.08120490610599518, | |
| "rewards/reward_matching": 0.6374689340591431, | |
| "rewards/reward_object_count": 0.6202906370162964, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 849.0, | |
| "completions/mean_length": 246.625, | |
| "completions/mean_terminated_length": 194.8, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 24.0, | |
| "grad_norm": 0.00029066766728647053, | |
| "kl": 0.00509595571202226, | |
| "learning_rate": 2.9977269673842554e-05, | |
| "loss": -0.0019, | |
| "num_tokens": 1801037.0, | |
| "reward": 0.616760790348053, | |
| "reward_std": 0.10064470022916794, | |
| "rewards/reward_matching": 0.5000989437103271, | |
| "rewards/reward_object_count": 0.6303821802139282, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 208.234375, | |
| "completions/mean_terminated_length": 139.10169491525423, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 24.5, | |
| "grad_norm": 0.0001182894702651538, | |
| "kl": 0.005687805562047288, | |
| "learning_rate": 2.997451764584951e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 1836188.0, | |
| "reward": 0.5805012583732605, | |
| "reward_std": 0.06066010519862175, | |
| "rewards/reward_matching": 0.4713587164878845, | |
| "rewards/reward_object_count": 0.48843005299568176, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 311.0, | |
| "completions/mean_length": 139.796875, | |
| "completions/mean_terminated_length": 125.76190476190476, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 25.0, | |
| "grad_norm": 5.7654466218082234e-05, | |
| "kl": 0.006528859958052635, | |
| "learning_rate": 2.9971608542252175e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 1866319.0, | |
| "reward": 0.6885979175567627, | |
| "reward_std": 0.039257895201444626, | |
| "rewards/reward_matching": 0.5929476618766785, | |
| "rewards/reward_object_count": 0.6641466617584229, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 821.0, | |
| "completions/mean_length": 204.296875, | |
| "completions/mean_terminated_length": 163.98360655737704, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 25.5, | |
| "grad_norm": 7.800667663104832e-05, | |
| "kl": 0.0062508382252417505, | |
| "learning_rate": 2.9968542393565674e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 1897058.0, | |
| "reward": 0.6192189455032349, | |
| "reward_std": 0.052703116089105606, | |
| "rewards/reward_matching": 0.5146477818489075, | |
| "rewards/reward_object_count": 0.5521511435508728, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 858.0, | |
| "completions/mean_length": 370.859375, | |
| "completions/mean_terminated_length": 249.90740740740742, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 26.0, | |
| "grad_norm": 7.006085070315748e-05, | |
| "kl": 0.00459836726076901, | |
| "learning_rate": 2.996531923195246e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 1939097.0, | |
| "reward": 0.5544993877410889, | |
| "reward_std": 0.04202552139759064, | |
| "rewards/reward_matching": 0.4601183831691742, | |
| "rewards/reward_object_count": 0.3921419382095337, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.203125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1008.0, | |
| "completions/mean_length": 561.46875, | |
| "completions/mean_terminated_length": 443.5686274509804, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 26.5, | |
| "grad_norm": 0.00015066047490108758, | |
| "kl": 0.003497203520964831, | |
| "learning_rate": 2.996193909122197e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 1993335.0, | |
| "reward": 0.5232954025268555, | |
| "reward_std": 0.05474445968866348, | |
| "rewards/reward_matching": 0.45413488149642944, | |
| "rewards/reward_object_count": 0.2540724277496338, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 349.0, | |
| "completions/max_terminated_length": 349.0, | |
| "completions/mean_length": 104.953125, | |
| "completions/mean_terminated_length": 104.953125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 27.0, | |
| "grad_norm": 0.00012995678116567433, | |
| "kl": 0.00867222691886127, | |
| "learning_rate": 2.995840200683028e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 2020596.0, | |
| "reward": 0.6695042848587036, | |
| "reward_std": 0.08031092584133148, | |
| "rewards/reward_matching": 0.522784948348999, | |
| "rewards/reward_object_count": 0.7791666984558105, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 731.0, | |
| "completions/mean_length": 191.703125, | |
| "completions/mean_terminated_length": 178.4920634920635, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 27.5, | |
| "grad_norm": 0.00017028290312737226, | |
| "kl": 0.006999723031185567, | |
| "learning_rate": 2.995470801587973e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 2054049.0, | |
| "reward": 0.6205588579177856, | |
| "reward_std": 0.10116489231586456, | |
| "rewards/reward_matching": 0.5161659717559814, | |
| "rewards/reward_object_count": 0.5699214935302734, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 159.0, | |
| "completions/max_terminated_length": 159.0, | |
| "completions/mean_length": 68.734375, | |
| "completions/mean_terminated_length": 68.734375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 28.0, | |
| "grad_norm": 6.421493890229613e-05, | |
| "kl": 0.012037722510285676, | |
| "learning_rate": 2.9950857157118544e-05, | |
| "loss": -0.0, | |
| "num_tokens": 2078032.0, | |
| "reward": 0.7118315696716309, | |
| "reward_std": 0.04499781131744385, | |
| "rewards/reward_matching": 0.5969762802124023, | |
| "rewards/reward_object_count": 0.7682291865348816, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 200.46875, | |
| "completions/mean_terminated_length": 173.90322580645162, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 28.5, | |
| "grad_norm": 5.707196032744832e-05, | |
| "kl": 0.007291222224012017, | |
| "learning_rate": 2.9946849470940395e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 2108526.0, | |
| "reward": 0.6197296380996704, | |
| "reward_std": 0.03817511722445488, | |
| "rewards/reward_matching": 0.5467733144760132, | |
| "rewards/reward_object_count": 0.4583281874656677, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 463.0, | |
| "completions/mean_length": 168.453125, | |
| "completions/mean_terminated_length": 140.8548387096774, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "epoch": 29.0, | |
| "grad_norm": 0.00027134420815855265, | |
| "kl": 0.009315767034422606, | |
| "learning_rate": 2.9942684999384034e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 2139531.0, | |
| "reward": 0.5399819612503052, | |
| "reward_std": 0.0870380625128746, | |
| "rewards/reward_matching": 0.4160889685153961, | |
| "rewards/reward_object_count": 0.46726763248443604, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 599.0, | |
| "completions/mean_length": 225.203125, | |
| "completions/mean_terminated_length": 185.91803278688525, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 29.5, | |
| "grad_norm": 7.844467472750694e-05, | |
| "kl": 0.007104447286110371, | |
| "learning_rate": 2.993836378613278e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 2172568.0, | |
| "reward": 0.6556448936462402, | |
| "reward_std": 0.04675156623125076, | |
| "rewards/reward_matching": 0.5574519634246826, | |
| "rewards/reward_object_count": 0.6058685183525085, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 278.96875, | |
| "completions/mean_terminated_length": 267.14285714285717, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 30.0, | |
| "grad_norm": 0.00016339456487912685, | |
| "kl": 0.007064116362016648, | |
| "learning_rate": 2.993388587651412e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 2209366.0, | |
| "reward": 0.5095100402832031, | |
| "reward_std": 0.0792592391371727, | |
| "rewards/reward_matching": 0.43418169021606445, | |
| "rewards/reward_object_count": 0.2762550711631775, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 468.0, | |
| "completions/max_terminated_length": 468.0, | |
| "completions/mean_length": 142.390625, | |
| "completions/mean_terminated_length": 142.390625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 30.5, | |
| "grad_norm": 9.066959319170564e-05, | |
| "kl": 0.009399229020345956, | |
| "learning_rate": 2.992925131749921e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2239663.0, | |
| "reward": 0.7246487140655518, | |
| "reward_std": 0.050000160932540894, | |
| "rewards/reward_matching": 0.6537588834762573, | |
| "rewards/reward_object_count": 0.6619668006896973, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 943.0, | |
| "completions/mean_length": 273.4375, | |
| "completions/mean_terminated_length": 249.2258064516129, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 31.0, | |
| "grad_norm": 0.00016052013961598277, | |
| "kl": 0.0073067472549155354, | |
| "learning_rate": 2.9924460157702378e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 2274827.0, | |
| "reward": 0.5953838229179382, | |
| "reward_std": 0.06788177788257599, | |
| "rewards/reward_matching": 0.49889495968818665, | |
| "rewards/reward_object_count": 0.4958592653274536, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 539.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 217.921875, | |
| "completions/mean_terminated_length": 217.921875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 31.5, | |
| "grad_norm": 8.415069169132039e-05, | |
| "kl": 0.007433710154145956, | |
| "learning_rate": 2.991951244738063e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2307398.0, | |
| "reward": 0.5583871006965637, | |
| "reward_std": 0.04484350234270096, | |
| "rewards/reward_matching": 0.45415085554122925, | |
| "rewards/reward_object_count": 0.4294828772544861, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 833.0, | |
| "completions/mean_length": 195.3125, | |
| "completions/mean_terminated_length": 125.08474576271186, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 32.0, | |
| "grad_norm": 0.00020136036619078368, | |
| "kl": 0.009423179202713072, | |
| "learning_rate": 2.9914408238433095e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 2336602.0, | |
| "reward": 0.6332944631576538, | |
| "reward_std": 0.060811009258031845, | |
| "rewards/reward_matching": 0.4973873496055603, | |
| "rewards/reward_object_count": 0.6743100881576538, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 221.671875, | |
| "completions/mean_terminated_length": 195.79032258064515, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 32.5, | |
| "grad_norm": 0.0001210306872962974, | |
| "kl": 0.008098234015051275, | |
| "learning_rate": 2.990914758440052e-05, | |
| "loss": 0.0, | |
| "num_tokens": 2368773.0, | |
| "reward": 0.6689934730529785, | |
| "reward_std": 0.07700366526842117, | |
| "rewards/reward_matching": 0.567918062210083, | |
| "rewards/reward_object_count": 0.6568382382392883, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 832.0, | |
| "completions/mean_length": 289.75, | |
| "completions/mean_terminated_length": 227.52542372881356, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 33.0, | |
| "grad_norm": 0.00016435940051451325, | |
| "kl": 0.007474837242625654, | |
| "learning_rate": 2.9903730540464668e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2406901.0, | |
| "reward": 0.5091193914413452, | |
| "reward_std": 0.05931903421878815, | |
| "rewards/reward_matching": 0.43151775002479553, | |
| "rewards/reward_object_count": 0.2666684687137604, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 799.0, | |
| "completions/mean_length": 187.078125, | |
| "completions/mean_terminated_length": 160.08064516129033, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 33.5, | |
| "grad_norm": 8.364054519915953e-05, | |
| "kl": 0.010546724603045732, | |
| "learning_rate": 2.9898157163447767e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 2440058.0, | |
| "reward": 0.6797939538955688, | |
| "reward_std": 0.04941866174340248, | |
| "rewards/reward_matching": 0.5802963376045227, | |
| "rewards/reward_object_count": 0.6580805778503418, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 253.125, | |
| "completions/mean_terminated_length": 215.21311475409837, | |
| "completions/min_length": 34.0, | |
| "completions/min_terminated_length": 34.0, | |
| "epoch": 34.0, | |
| "grad_norm": 0.0002845938433893025, | |
| "kl": 0.01157232653349638, | |
| "learning_rate": 2.9892427511811912e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2474562.0, | |
| "reward": 0.5502975583076477, | |
| "reward_std": 0.09778749942779541, | |
| "rewards/reward_matching": 0.43594658374786377, | |
| "rewards/reward_object_count": 0.45927298069000244, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 599.0, | |
| "completions/mean_length": 215.09375, | |
| "completions/mean_terminated_length": 189.0, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 34.5, | |
| "grad_norm": 0.00017123304132837802, | |
| "kl": 0.010649158153682947, | |
| "learning_rate": 2.9886541645658435e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 2507592.0, | |
| "reward": 0.6654698848724365, | |
| "reward_std": 0.05868534743785858, | |
| "rewards/reward_matching": 0.5466117858886719, | |
| "rewards/reward_object_count": 0.6875138282775879, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 959.0, | |
| "completions/mean_length": 212.8125, | |
| "completions/mean_terminated_length": 158.73333333333332, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "epoch": 35.0, | |
| "grad_norm": 0.001203745836392045, | |
| "kl": 0.027731457608751953, | |
| "learning_rate": 2.9880499626727284e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 2542396.0, | |
| "reward": 0.6588045954704285, | |
| "reward_std": 0.060804709792137146, | |
| "rewards/reward_matching": 0.5325086116790771, | |
| "rewards/reward_object_count": 0.7121223211288452, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 691.0, | |
| "completions/mean_length": 220.875, | |
| "completions/mean_terminated_length": 194.96774193548387, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 35.5, | |
| "grad_norm": 0.00018338189693167806, | |
| "kl": 0.011799431755207479, | |
| "learning_rate": 2.9874301518396377e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2578036.0, | |
| "reward": 0.6314491033554077, | |
| "reward_std": 0.07501392066478729, | |
| "rewards/reward_matching": 0.5245179533958435, | |
| "rewards/reward_object_count": 0.5836920142173767, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 205.171875, | |
| "completions/mean_terminated_length": 178.75806451612902, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 36.0, | |
| "grad_norm": 0.00012583017814904451, | |
| "kl": 0.010461569239851087, | |
| "learning_rate": 2.986794738568094e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 2608191.0, | |
| "reward": 0.613935112953186, | |
| "reward_std": 0.06065426021814346, | |
| "rewards/reward_matching": 0.49310362339019775, | |
| "rewards/reward_object_count": 0.5903645753860474, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 767.0, | |
| "completions/mean_length": 74.953125, | |
| "completions/mean_terminated_length": 59.888888888888886, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 36.5, | |
| "grad_norm": 9.753658378031105e-05, | |
| "kl": 0.018295559682883322, | |
| "learning_rate": 2.9861437295232825e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2634492.0, | |
| "reward": 0.7185924649238586, | |
| "reward_std": 0.06972639262676239, | |
| "rewards/reward_matching": 0.6025491952896118, | |
| "rewards/reward_object_count": 0.8009397983551025, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 658.0, | |
| "completions/mean_length": 270.625, | |
| "completions/mean_terminated_length": 233.5737704918033, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "epoch": 37.0, | |
| "grad_norm": 0.0003199022903572768, | |
| "kl": 0.010636754508595914, | |
| "learning_rate": 2.9854771315339787e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 2672036.0, | |
| "reward": 0.5783323049545288, | |
| "reward_std": 0.0985267236828804, | |
| "rewards/reward_matching": 0.4558379054069519, | |
| "rewards/reward_object_count": 0.5397727489471436, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 843.0, | |
| "completions/mean_length": 384.5625, | |
| "completions/mean_terminated_length": 330.3728813559322, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 37.5, | |
| "grad_norm": 0.00016259767289739102, | |
| "kl": 0.010080361389555037, | |
| "learning_rate": 2.984794951592481e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 2714632.0, | |
| "reward": 0.4983637034893036, | |
| "reward_std": 0.06861061602830887, | |
| "rewards/reward_matching": 0.423081636428833, | |
| "rewards/reward_object_count": 0.2538233995437622, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 105.0, | |
| "completions/max_terminated_length": 105.0, | |
| "completions/mean_length": 51.953125, | |
| "completions/mean_terminated_length": 51.953125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 38.0, | |
| "grad_norm": 5.508283356903121e-05, | |
| "kl": 0.020864711608737707, | |
| "learning_rate": 2.984097196854534e-05, | |
| "loss": -0.0, | |
| "num_tokens": 2738501.0, | |
| "reward": 0.7815486192703247, | |
| "reward_std": 0.04100874066352844, | |
| "rewards/reward_matching": 0.6749768853187561, | |
| "rewards/reward_object_count": 0.8828125, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 709.0, | |
| "completions/mean_length": 244.484375, | |
| "completions/mean_terminated_length": 219.33870967741936, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 38.5, | |
| "grad_norm": 8.971132774604484e-05, | |
| "kl": 0.009314798924606293, | |
| "learning_rate": 2.9833838746392544e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 2771492.0, | |
| "reward": 0.6448056697845459, | |
| "reward_std": 0.047796234488487244, | |
| "rewards/reward_matching": 0.5422253012657166, | |
| "rewards/reward_object_count": 0.5973524451255798, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 568.0, | |
| "completions/mean_length": 236.46875, | |
| "completions/mean_terminated_length": 183.96666666666667, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 39.0, | |
| "grad_norm": 0.00015159579925239086, | |
| "kl": 0.01285445858957246, | |
| "learning_rate": 2.982654992429056e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 2808130.0, | |
| "reward": 0.6628507971763611, | |
| "reward_std": 0.061860181391239166, | |
| "rewards/reward_matching": 0.564445972442627, | |
| "rewards/reward_object_count": 0.6365411281585693, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 733.0, | |
| "completions/mean_length": 224.75, | |
| "completions/mean_terminated_length": 185.44262295081967, | |
| "completions/min_length": 70.0, | |
| "completions/min_terminated_length": 70.0, | |
| "epoch": 39.5, | |
| "grad_norm": 0.0001908452104544267, | |
| "kl": 0.015527774463407695, | |
| "learning_rate": 2.981910557869566e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 2843378.0, | |
| "reward": 0.5593058466911316, | |
| "reward_std": 0.07343290746212006, | |
| "rewards/reward_matching": 0.48125869035720825, | |
| "rewards/reward_object_count": 0.3527531325817108, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 600.0, | |
| "completions/max_terminated_length": 600.0, | |
| "completions/mean_length": 139.65625, | |
| "completions/mean_terminated_length": 139.65625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 40.0, | |
| "grad_norm": 0.00013636126823257655, | |
| "kl": 0.01637664856389165, | |
| "learning_rate": 2.981150578769553e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2870300.0, | |
| "reward": 0.6560741066932678, | |
| "reward_std": 0.06251361966133118, | |
| "rewards/reward_matching": 0.5192815065383911, | |
| "rewards/reward_object_count": 0.7225260734558105, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 197.0, | |
| "completions/max_terminated_length": 197.0, | |
| "completions/mean_length": 81.265625, | |
| "completions/mean_terminated_length": 81.265625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 40.5, | |
| "grad_norm": 0.00010307910270057619, | |
| "kl": 0.01742720091715455, | |
| "learning_rate": 2.980375063100836e-05, | |
| "loss": -0.0, | |
| "num_tokens": 2895725.0, | |
| "reward": 0.7209770083427429, | |
| "reward_std": 0.04894069582223892, | |
| "rewards/reward_matching": 0.6191629767417908, | |
| "rewards/reward_object_count": 0.7473958730697632, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 190.921875, | |
| "completions/mean_terminated_length": 177.6984126984127, | |
| "completions/min_length": 38.0, | |
| "completions/min_terminated_length": 38.0, | |
| "epoch": 41.0, | |
| "grad_norm": 6.521846080431715e-05, | |
| "kl": 0.01442325720563531, | |
| "learning_rate": 2.979584018998209e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 2928488.0, | |
| "reward": 0.5859934091567993, | |
| "reward_std": 0.032855767756700516, | |
| "rewards/reward_matching": 0.503061056137085, | |
| "rewards/reward_object_count": 0.42078372836112976, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 927.0, | |
| "completions/mean_length": 206.546875, | |
| "completions/mean_terminated_length": 121.98275862068965, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 41.5, | |
| "grad_norm": 0.00013890951231587678, | |
| "kl": 0.011370213003829122, | |
| "learning_rate": 2.97877745475935e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 2962251.0, | |
| "reward": 0.6619434356689453, | |
| "reward_std": 0.05908970534801483, | |
| "rewards/reward_matching": 0.545287013053894, | |
| "rewards/reward_object_count": 0.6894807815551758, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 195.953125, | |
| "completions/mean_terminated_length": 169.24193548387098, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 42.0, | |
| "grad_norm": 0.00014514043868985027, | |
| "kl": 0.01611044444143772, | |
| "learning_rate": 2.9779553788447358e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 2995016.0, | |
| "reward": 0.5909014344215393, | |
| "reward_std": 0.07796993851661682, | |
| "rewards/reward_matching": 0.5025642514228821, | |
| "rewards/reward_object_count": 0.4624394476413727, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 815.0, | |
| "completions/mean_length": 312.546875, | |
| "completions/mean_terminated_length": 277.55737704918033, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 42.5, | |
| "grad_norm": 0.00012988250819034874, | |
| "kl": 0.012926546158269048, | |
| "learning_rate": 2.977117799877554e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 3033323.0, | |
| "reward": 0.5925983190536499, | |
| "reward_std": 0.056041646748781204, | |
| "rewards/reward_matching": 0.49923017621040344, | |
| "rewards/reward_object_count": 0.46530094742774963, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 577.0, | |
| "completions/max_terminated_length": 577.0, | |
| "completions/mean_length": 187.328125, | |
| "completions/mean_terminated_length": 187.328125, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 43.0, | |
| "grad_norm": 0.00040236019412986934, | |
| "kl": 0.017144598648883402, | |
| "learning_rate": 2.9762647266436115e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 3063296.0, | |
| "reward": 0.5428475141525269, | |
| "reward_std": 0.09201589226722717, | |
| "rewards/reward_matching": 0.4472660422325134, | |
| "rewards/reward_object_count": 0.41931426525115967, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 311.0, | |
| "completions/max_terminated_length": 311.0, | |
| "completions/mean_length": 88.671875, | |
| "completions/mean_terminated_length": 88.671875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 43.5, | |
| "grad_norm": 8.879049710230902e-05, | |
| "kl": 0.016023017466068268, | |
| "learning_rate": 2.9753961680912432e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3085995.0, | |
| "reward": 0.687000036239624, | |
| "reward_std": 0.049605756998062134, | |
| "rewards/reward_matching": 0.5584548711776733, | |
| "rewards/reward_object_count": 0.7596354484558105, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 729.0, | |
| "completions/mean_length": 308.359375, | |
| "completions/mean_terminated_length": 273.1639344262295, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 44.0, | |
| "grad_norm": 0.00044992516632191837, | |
| "kl": 0.02378708287142217, | |
| "learning_rate": 2.9745121333312166e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 3123394.0, | |
| "reward": 0.4918820559978485, | |
| "reward_std": 0.11219721287488937, | |
| "rewards/reward_matching": 0.3640925884246826, | |
| "rewards/reward_object_count": 0.4608825445175171, | |
| "rewards/reward_parseable": 0.90625, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.0, | |
| "completions/mean_length": 241.46875, | |
| "completions/mean_terminated_length": 189.3, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 44.5, | |
| "grad_norm": 8.519666153006256e-05, | |
| "kl": 0.012101836502552032, | |
| "learning_rate": 2.9736126316366385e-05, | |
| "loss": 0.0, | |
| "num_tokens": 3158112.0, | |
| "reward": 0.6382660865783691, | |
| "reward_std": 0.04055645316839218, | |
| "rewards/reward_matching": 0.4989316463470459, | |
| "rewards/reward_object_count": 0.6945353746414185, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 434.0, | |
| "completions/mean_length": 164.859375, | |
| "completions/mean_terminated_length": 151.22222222222223, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 45.0, | |
| "grad_norm": 0.0005300024640746415, | |
| "kl": 0.029471338726580143, | |
| "learning_rate": 2.9726976724428563e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 3189527.0, | |
| "reward": 0.6498422622680664, | |
| "reward_std": 0.13978248834609985, | |
| "rewards/reward_matching": 0.5533084869384766, | |
| "rewards/reward_object_count": 0.6361607313156128, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 119.578125, | |
| "completions/mean_terminated_length": 119.578125, | |
| "completions/min_length": 70.0, | |
| "completions/min_terminated_length": 70.0, | |
| "epoch": 45.5, | |
| "grad_norm": 0.00018720145453698933, | |
| "kl": 0.02450899383984506, | |
| "learning_rate": 2.9717672653473588e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 3217724.0, | |
| "reward": 0.6467303037643433, | |
| "reward_std": 0.05769674479961395, | |
| "rewards/reward_matching": 0.5479917526245117, | |
| "rewards/reward_object_count": 0.5896763205528259, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 337.0, | |
| "completions/mean_length": 99.203125, | |
| "completions/mean_terminated_length": 84.52380952380952, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 46.0, | |
| "grad_norm": 7.427236414514482e-05, | |
| "kl": 0.022152320947498083, | |
| "learning_rate": 2.9708214201096758e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3242377.0, | |
| "reward": 0.6481877565383911, | |
| "reward_std": 0.06066766381263733, | |
| "rewards/reward_matching": 0.4977788031101227, | |
| "rewards/reward_object_count": 0.7476025223731995, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 729.0, | |
| "completions/max_terminated_length": 729.0, | |
| "completions/mean_length": 219.96875, | |
| "completions/mean_terminated_length": 219.96875, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 46.5, | |
| "grad_norm": 0.00016481881903018802, | |
| "kl": 0.016606852994300425, | |
| "learning_rate": 2.9698601466512767e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3275079.0, | |
| "reward": 0.5845397710800171, | |
| "reward_std": 0.04967654123902321, | |
| "rewards/reward_matching": 0.5262070894241333, | |
| "rewards/reward_object_count": 0.3440776765346527, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 766.0, | |
| "completions/mean_length": 309.375, | |
| "completions/mean_terminated_length": 274.2295081967213, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 47.0, | |
| "grad_norm": 0.00014561145508196205, | |
| "kl": 0.014288356876932085, | |
| "learning_rate": 2.9688834550554647e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 3315103.0, | |
| "reward": 0.6348440647125244, | |
| "reward_std": 0.07563169300556183, | |
| "rewards/reward_matching": 0.5213068723678589, | |
| "rewards/reward_object_count": 0.6102994084358215, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 425.0, | |
| "completions/max_terminated_length": 425.0, | |
| "completions/mean_length": 139.234375, | |
| "completions/mean_terminated_length": 139.234375, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 47.5, | |
| "grad_norm": 0.00018322512914892286, | |
| "kl": 0.023316799197345972, | |
| "learning_rate": 2.9678913555672733e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 3343918.0, | |
| "reward": 0.6510435342788696, | |
| "reward_std": 0.09565050899982452, | |
| "rewards/reward_matching": 0.5266523361206055, | |
| "rewards/reward_object_count": 0.6908854246139526, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 402.0, | |
| "completions/mean_length": 134.640625, | |
| "completions/mean_terminated_length": 120.52380952380952, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 48.0, | |
| "grad_norm": 0.00011491947225295007, | |
| "kl": 0.021518109366297722, | |
| "learning_rate": 2.966883858593356e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 3373719.0, | |
| "reward": 0.7392737865447998, | |
| "reward_std": 0.04068930447101593, | |
| "rewards/reward_matching": 0.6502777338027954, | |
| "rewards/reward_object_count": 0.7455357313156128, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 197.0, | |
| "completions/max_terminated_length": 197.0, | |
| "completions/mean_length": 80.046875, | |
| "completions/mean_terminated_length": 80.046875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 48.5, | |
| "grad_norm": 9.114377462537959e-05, | |
| "kl": 0.03106052055954933, | |
| "learning_rate": 2.9658609747018796e-05, | |
| "loss": 0.0, | |
| "num_tokens": 3400666.0, | |
| "reward": 0.7638974785804749, | |
| "reward_std": 0.036661915481090546, | |
| "rewards/reward_matching": 0.6776763200759888, | |
| "rewards/reward_object_count": 0.7864583730697632, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 760.0, | |
| "completions/max_terminated_length": 760.0, | |
| "completions/mean_length": 195.453125, | |
| "completions/mean_terminated_length": 195.453125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 49.0, | |
| "grad_norm": 8.127772161969915e-05, | |
| "kl": 0.017332423012703657, | |
| "learning_rate": 2.964822714622412e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 3430519.0, | |
| "reward": 0.6093304753303528, | |
| "reward_std": 0.046209633350372314, | |
| "rewards/reward_matching": 0.49052512645721436, | |
| "rewards/reward_object_count": 0.5750769972801208, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 832.0, | |
| "completions/mean_length": 303.984375, | |
| "completions/mean_terminated_length": 280.758064516129, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 49.5, | |
| "grad_norm": 0.00037192818126641214, | |
| "kl": 0.02439494035206735, | |
| "learning_rate": 2.9637690892458103e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 3470198.0, | |
| "reward": 0.5828637480735779, | |
| "reward_std": 0.1055147647857666, | |
| "rewards/reward_matching": 0.4695713520050049, | |
| "rewards/reward_object_count": 0.53685462474823, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 218.09375, | |
| "completions/mean_terminated_length": 205.3015873015873, | |
| "completions/min_length": 14.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 50.0, | |
| "grad_norm": 0.0021957538556307554, | |
| "kl": 0.06775743188336492, | |
| "learning_rate": 2.962700109624106e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 3501820.0, | |
| "reward": 0.6071096658706665, | |
| "reward_std": 0.09001424908638, | |
| "rewards/reward_matching": 0.4763699769973755, | |
| "rewards/reward_object_count": 0.6220631003379822, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 273.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 96.8125, | |
| "completions/mean_terminated_length": 96.8125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 50.5, | |
| "grad_norm": 7.189081952674314e-05, | |
| "kl": 0.026759767439216375, | |
| "learning_rate": 2.961615786970389e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3526640.0, | |
| "reward": 0.6088296175003052, | |
| "reward_std": 0.042443305253982544, | |
| "rewards/reward_matching": 0.4899764060974121, | |
| "rewards/reward_object_count": 0.57421875, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 633.0, | |
| "completions/mean_length": 274.484375, | |
| "completions/mean_terminated_length": 250.30645161290323, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 51.0, | |
| "grad_norm": 0.0001283042220165953, | |
| "kl": 0.017867632559500635, | |
| "learning_rate": 2.960516132658692e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 3564751.0, | |
| "reward": 0.6425777673721313, | |
| "reward_std": 0.05326495319604874, | |
| "rewards/reward_matching": 0.545784592628479, | |
| "rewards/reward_object_count": 0.5755347013473511, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 910.0, | |
| "completions/mean_length": 265.46875, | |
| "completions/mean_terminated_length": 241.0, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 51.5, | |
| "grad_norm": 0.0001271759974770248, | |
| "kl": 0.016812809044495225, | |
| "learning_rate": 2.9594011582238672e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 3601005.0, | |
| "reward": 0.5850105285644531, | |
| "reward_std": 0.048566583544015884, | |
| "rewards/reward_matching": 0.4884982705116272, | |
| "rewards/reward_object_count": 0.4595579504966736, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 153.640625, | |
| "completions/mean_terminated_length": 110.8360655737705, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 52.0, | |
| "grad_norm": 9.4698800239712e-05, | |
| "kl": 0.02293774695135653, | |
| "learning_rate": 2.95827087536147e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 3632342.0, | |
| "reward": 0.7446116209030151, | |
| "reward_std": 0.04331940785050392, | |
| "rewards/reward_matching": 0.6290016770362854, | |
| "rewards/reward_object_count": 0.8360530138015747, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 615.0, | |
| "completions/mean_length": 183.75, | |
| "completions/mean_terminated_length": 156.6451612903226, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 52.5, | |
| "grad_norm": 0.00010602272232063115, | |
| "kl": 0.021552881691604853, | |
| "learning_rate": 2.9571252959276313e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 3662726.0, | |
| "reward": 0.579430341720581, | |
| "reward_std": 0.05669737607240677, | |
| "rewards/reward_matching": 0.4479978680610657, | |
| "rewards/reward_object_count": 0.5531580448150635, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 273.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 74.1875, | |
| "completions/mean_terminated_length": 74.1875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 53.0, | |
| "grad_norm": 0.00010021215712185949, | |
| "kl": 0.0273100093472749, | |
| "learning_rate": 2.955964431938939e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 3688018.0, | |
| "reward": 0.7427883148193359, | |
| "reward_std": 0.05413203686475754, | |
| "rewards/reward_matching": 0.6073381900787354, | |
| "rewards/reward_object_count": 0.8919271230697632, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 311.0, | |
| "completions/max_terminated_length": 311.0, | |
| "completions/mean_length": 120.453125, | |
| "completions/mean_terminated_length": 120.453125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 53.5, | |
| "grad_norm": 0.00012529945524875075, | |
| "kl": 0.028699786867946386, | |
| "learning_rate": 2.9547882955723052e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3713391.0, | |
| "reward": 0.6109194755554199, | |
| "reward_std": 0.055304668843746185, | |
| "rewards/reward_matching": 0.5087559223175049, | |
| "rewards/reward_object_count": 0.528329610824585, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 502.0, | |
| "completions/mean_length": 183.0625, | |
| "completions/mean_terminated_length": 127.0, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 54.0, | |
| "grad_norm": 0.00030532677192240953, | |
| "kl": 0.03329086292069405, | |
| "learning_rate": 2.953596899164846e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 3745331.0, | |
| "reward": 0.6400688886642456, | |
| "reward_std": 0.07565727084875107, | |
| "rewards/reward_matching": 0.5241180658340454, | |
| "rewards/reward_object_count": 0.6592400670051575, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 943.0, | |
| "completions/max_terminated_length": 943.0, | |
| "completions/mean_length": 246.90625, | |
| "completions/mean_terminated_length": 246.90625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 54.5, | |
| "grad_norm": 0.00014556830865330994, | |
| "kl": 0.01744238520041108, | |
| "learning_rate": 2.9523902552137436e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 3781357.0, | |
| "reward": 0.6747822761535645, | |
| "reward_std": 0.046912893652915955, | |
| "rewards/reward_matching": 0.6312285661697388, | |
| "rewards/reward_object_count": 0.48022598028182983, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 898.0, | |
| "completions/mean_length": 231.8125, | |
| "completions/mean_terminated_length": 219.23809523809524, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 55.0, | |
| "grad_norm": 0.0007359112496487796, | |
| "kl": 0.03722105058841407, | |
| "learning_rate": 2.951168376376124e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 3814177.0, | |
| "reward": 0.5341554880142212, | |
| "reward_std": 0.12695415318012238, | |
| "rewards/reward_matching": 0.3953123688697815, | |
| "rewards/reward_object_count": 0.5473405718803406, | |
| "rewards/reward_parseable": 0.9375, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 767.0, | |
| "completions/mean_length": 190.8125, | |
| "completions/mean_terminated_length": 163.93548387096774, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 55.5, | |
| "grad_norm": 0.00014860746159683913, | |
| "kl": 0.020320012234151363, | |
| "learning_rate": 2.9499312754689168e-05, | |
| "loss": -0.0, | |
| "num_tokens": 3846933.0, | |
| "reward": 0.6570301055908203, | |
| "reward_std": 0.08989942818880081, | |
| "rewards/reward_matching": 0.5301545858383179, | |
| "rewards/reward_object_count": 0.7103118300437927, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 699.0, | |
| "completions/mean_length": 207.671875, | |
| "completions/mean_terminated_length": 194.71428571428572, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 56.0, | |
| "grad_norm": 0.00010286509495927021, | |
| "kl": 0.01653504034038633, | |
| "learning_rate": 2.9486789654687256e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 3879168.0, | |
| "reward": 0.6560405492782593, | |
| "reward_std": 0.05360962823033333, | |
| "rewards/reward_matching": 0.5268334746360779, | |
| "rewards/reward_object_count": 0.6997023820877075, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 260.84375, | |
| "completions/mean_terminated_length": 236.2258064516129, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 56.5, | |
| "grad_norm": 0.00011981173156527802, | |
| "kl": 0.01864371739793569, | |
| "learning_rate": 2.94741145951169e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 3915766.0, | |
| "reward": 0.5610636472702026, | |
| "reward_std": 0.04658506065607071, | |
| "rewards/reward_matching": 0.4559590816497803, | |
| "rewards/reward_object_count": 0.4374409317970276, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 767.0, | |
| "completions/mean_length": 225.21875, | |
| "completions/mean_terminated_length": 199.4516129032258, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 57.0, | |
| "grad_norm": 0.00016182979743462056, | |
| "kl": 0.01676144660450518, | |
| "learning_rate": 2.9461287708933475e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 3952004.0, | |
| "reward": 0.740257740020752, | |
| "reward_std": 0.05227687209844589, | |
| "rewards/reward_matching": 0.6612553596496582, | |
| "rewards/reward_object_count": 0.717522919178009, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 272.0, | |
| "completions/max_terminated_length": 272.0, | |
| "completions/mean_length": 95.96875, | |
| "completions/mean_terminated_length": 95.96875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 57.5, | |
| "grad_norm": 8.677188452566043e-05, | |
| "kl": 0.02889580768533051, | |
| "learning_rate": 2.9448309130684944e-05, | |
| "loss": -0.0, | |
| "num_tokens": 3979330.0, | |
| "reward": 0.7038345336914062, | |
| "reward_std": 0.03829924017190933, | |
| "rewards/reward_matching": 0.6301881074905396, | |
| "rewards/reward_object_count": 0.6286086440086365, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 957.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 249.09375, | |
| "completions/mean_terminated_length": 249.09375, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 58.0, | |
| "grad_norm": 0.00013837260485161096, | |
| "kl": 0.018972176825627685, | |
| "learning_rate": 2.9435178996510456e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4015496.0, | |
| "reward": 0.6081739664077759, | |
| "reward_std": 0.06578633934259415, | |
| "rewards/reward_matching": 0.49933409690856934, | |
| "rewards/reward_object_count": 0.5428677797317505, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 764.0, | |
| "completions/mean_length": 206.546875, | |
| "completions/mean_terminated_length": 180.17741935483872, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 58.5, | |
| "grad_norm": 0.0002509634068701416, | |
| "kl": 0.02328762272372842, | |
| "learning_rate": 2.9421897444138902e-05, | |
| "loss": -0.0, | |
| "num_tokens": 4047979.0, | |
| "reward": 0.6523654460906982, | |
| "reward_std": 0.09278056025505066, | |
| "rewards/reward_matching": 0.5031481981277466, | |
| "rewards/reward_object_count": 0.7680073976516724, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 577.0, | |
| "completions/max_terminated_length": 577.0, | |
| "completions/mean_length": 121.125, | |
| "completions/mean_terminated_length": 121.125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 59.0, | |
| "grad_norm": 7.174992060754448e-05, | |
| "kl": 0.02193958149291575, | |
| "learning_rate": 2.9408464612887484e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4075955.0, | |
| "reward": 0.6293376088142395, | |
| "reward_std": 0.03621161729097366, | |
| "rewards/reward_matching": 0.5260950922966003, | |
| "rewards/reward_object_count": 0.5684027671813965, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 308.25, | |
| "completions/mean_terminated_length": 260.53333333333336, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 59.5, | |
| "grad_norm": 0.00014954354264773428, | |
| "kl": 0.019514269777573645, | |
| "learning_rate": 2.9394880643660242e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 4120067.0, | |
| "reward": 0.664871096611023, | |
| "reward_std": 0.0682295560836792, | |
| "rewards/reward_matching": 0.5886327624320984, | |
| "rewards/reward_object_count": 0.57408207654953, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1000.0, | |
| "completions/mean_length": 254.0625, | |
| "completions/mean_terminated_length": 229.2258064516129, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "epoch": 60.0, | |
| "grad_norm": 0.0005555336247198284, | |
| "kl": 0.06151175429113209, | |
| "learning_rate": 2.938114567894659e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 4153351.0, | |
| "reward": 0.49984338879585266, | |
| "reward_std": 0.08151112496852875, | |
| "rewards/reward_matching": 0.3786693215370178, | |
| "rewards/reward_object_count": 0.3944591283798218, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 709.0, | |
| "completions/mean_length": 302.734375, | |
| "completions/mean_terminated_length": 279.46774193548384, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 60.5, | |
| "grad_norm": 0.00010743723396444693, | |
| "kl": 0.017743419273756444, | |
| "learning_rate": 2.9367259862819805e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 4190070.0, | |
| "reward": 0.5756763219833374, | |
| "reward_std": 0.05204359441995621, | |
| "rewards/reward_matching": 0.43783289194107056, | |
| "rewards/reward_object_count": 0.5648829936981201, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 159.0, | |
| "completions/max_terminated_length": 159.0, | |
| "completions/mean_length": 71.453125, | |
| "completions/mean_terminated_length": 71.453125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 61.0, | |
| "grad_norm": 7.059347990434617e-05, | |
| "kl": 0.02535859332419932, | |
| "learning_rate": 2.9353223340935533e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 4212627.0, | |
| "reward": 0.6173241138458252, | |
| "reward_std": 0.045353930443525314, | |
| "rewards/reward_matching": 0.50152987241745, | |
| "rewards/reward_object_count": 0.58203125, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 235.0, | |
| "completions/max_terminated_length": 235.0, | |
| "completions/mean_length": 105.625, | |
| "completions/mean_terminated_length": 105.625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 61.5, | |
| "grad_norm": 7.782715692883357e-05, | |
| "kl": 0.02937923581339419, | |
| "learning_rate": 2.933903626053024e-05, | |
| "loss": -0.0, | |
| "num_tokens": 4237051.0, | |
| "reward": 0.6152711510658264, | |
| "reward_std": 0.04115668684244156, | |
| "rewards/reward_matching": 0.5210062265396118, | |
| "rewards/reward_object_count": 0.5133370757102966, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 237.28125, | |
| "completions/mean_terminated_length": 198.59016393442624, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 62.0, | |
| "grad_norm": 0.0003079625021200627, | |
| "kl": 0.03858275443781167, | |
| "learning_rate": 2.932469877041969e-05, | |
| "loss": -0.0022, | |
| "num_tokens": 4273101.0, | |
| "reward": 0.5917791128158569, | |
| "reward_std": 0.07503822445869446, | |
| "rewards/reward_matching": 0.49250179529190063, | |
| "rewards/reward_object_count": 0.5282653570175171, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 943.0, | |
| "completions/max_terminated_length": 943.0, | |
| "completions/mean_length": 147.421875, | |
| "completions/mean_terminated_length": 147.421875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 62.5, | |
| "grad_norm": 0.0001133783589466475, | |
| "kl": 0.021977555472403765, | |
| "learning_rate": 2.931021102099737e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4300520.0, | |
| "reward": 0.7027174234390259, | |
| "reward_std": 0.05097541958093643, | |
| "rewards/reward_matching": 0.5685935616493225, | |
| "rewards/reward_object_count": 0.8078063130378723, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 273.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 80.84375, | |
| "completions/mean_terminated_length": 80.84375, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 63.0, | |
| "grad_norm": 9.239943028660491e-05, | |
| "kl": 0.04109277273528278, | |
| "learning_rate": 2.9295573164232913e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 4322718.0, | |
| "reward": 0.5727678537368774, | |
| "reward_std": 0.06367385387420654, | |
| "rewards/reward_matching": 0.44185274839401245, | |
| "rewards/reward_object_count": 0.553906261920929, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 540.0, | |
| "completions/max_terminated_length": 540.0, | |
| "completions/mean_length": 165.65625, | |
| "completions/mean_terminated_length": 165.65625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 63.5, | |
| "grad_norm": 8.721143240109086e-05, | |
| "kl": 0.02322767348960042, | |
| "learning_rate": 2.9280785353670514e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4355464.0, | |
| "reward": 0.7161321640014648, | |
| "reward_std": 0.0398261621594429, | |
| "rewards/reward_matching": 0.6192046403884888, | |
| "rewards/reward_object_count": 0.7230468988418579, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 121.0, | |
| "completions/max_terminated_length": 121.0, | |
| "completions/mean_length": 54.1875, | |
| "completions/mean_terminated_length": 54.1875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 64.0, | |
| "grad_norm": 9.180100460071117e-05, | |
| "kl": 0.030635279836133122, | |
| "learning_rate": 2.9265847744427305e-05, | |
| "loss": 0.0, | |
| "num_tokens": 4377236.0, | |
| "reward": 0.7174146771430969, | |
| "reward_std": 0.06405159085988998, | |
| "rewards/reward_matching": 0.5802397727966309, | |
| "rewards/reward_object_count": 0.8463541865348816, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 273.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 132.71875, | |
| "completions/mean_terminated_length": 132.71875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 64.5, | |
| "grad_norm": 0.00010723716695792973, | |
| "kl": 0.024373686173930764, | |
| "learning_rate": 2.925076049319174e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4405954.0, | |
| "reward": 0.6944972276687622, | |
| "reward_std": 0.06675916910171509, | |
| "rewards/reward_matching": 0.543346107006073, | |
| "rewards/reward_object_count": 0.8424479365348816, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 165.234375, | |
| "completions/mean_terminated_length": 151.6031746031746, | |
| "completions/min_length": 39.0, | |
| "completions/min_terminated_length": 39.0, | |
| "epoch": 65.0, | |
| "grad_norm": 8.877630898496136e-05, | |
| "kl": 0.017184904776513577, | |
| "learning_rate": 2.9235523758221944e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4434193.0, | |
| "reward": 0.5842474699020386, | |
| "reward_std": 0.059734977781772614, | |
| "rewards/reward_matching": 0.47713083028793335, | |
| "rewards/reward_object_count": 0.4898448884487152, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 615.0, | |
| "completions/max_terminated_length": 615.0, | |
| "completions/mean_length": 151.40625, | |
| "completions/mean_terminated_length": 151.40625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 65.5, | |
| "grad_norm": 0.00013446353841573, | |
| "kl": 0.02186008053831756, | |
| "learning_rate": 2.922013769934406e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4461547.0, | |
| "reward": 0.610063910484314, | |
| "reward_std": 0.07120901346206665, | |
| "rewards/reward_matching": 0.43639126420021057, | |
| "rewards/reward_object_count": 0.7411458492279053, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 205.0, | |
| "completions/mean_length": 88.21875, | |
| "completions/mean_terminated_length": 73.36507936507937, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 66.0, | |
| "grad_norm": 5.741886707255617e-05, | |
| "kl": 0.023789451690390706, | |
| "learning_rate": 2.920460247795056e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4485177.0, | |
| "reward": 0.6123472452163696, | |
| "reward_std": 0.04124218225479126, | |
| "rewards/reward_matching": 0.4969410002231598, | |
| "rewards/reward_object_count": 0.5709134936332703, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.109375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 799.0, | |
| "completions/mean_length": 361.5, | |
| "completions/mean_terminated_length": 280.140350877193, | |
| "completions/min_length": 11.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 66.5, | |
| "grad_norm": 0.0001969350705621764, | |
| "kl": 0.031420703046023846, | |
| "learning_rate": 2.918891825699857e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4527897.0, | |
| "reward": 0.5840119123458862, | |
| "reward_std": 0.09337732195854187, | |
| "rewards/reward_matching": 0.4611853361129761, | |
| "rewards/reward_object_count": 0.5521284341812134, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 700.0, | |
| "completions/mean_length": 173.21875, | |
| "completions/mean_terminated_length": 159.71428571428572, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 67.0, | |
| "grad_norm": 0.0017126374877989292, | |
| "kl": 0.061008882825262845, | |
| "learning_rate": 2.9173085201008144e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4557607.0, | |
| "reward": 0.5862646698951721, | |
| "reward_std": 0.09075936675071716, | |
| "rewards/reward_matching": 0.46519267559051514, | |
| "rewards/reward_object_count": 0.5669952630996704, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 539.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 131.765625, | |
| "completions/mean_terminated_length": 131.765625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 67.5, | |
| "grad_norm": 7.290254143299535e-05, | |
| "kl": 0.022246332373470068, | |
| "learning_rate": 2.9157103476060547e-05, | |
| "loss": 0.0, | |
| "num_tokens": 4585944.0, | |
| "reward": 0.7023236155509949, | |
| "reward_std": 0.04230645298957825, | |
| "rewards/reward_matching": 0.5999230146408081, | |
| "rewards/reward_object_count": 0.7118489742279053, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 766.0, | |
| "completions/mean_length": 188.296875, | |
| "completions/mean_terminated_length": 175.03174603174602, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 68.0, | |
| "grad_norm": 0.0003481461899355054, | |
| "kl": 0.04150618601124734, | |
| "learning_rate": 2.914097324979651e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 4616619.0, | |
| "reward": 0.5926013588905334, | |
| "reward_std": 0.08262215554714203, | |
| "rewards/reward_matching": 0.4751903712749481, | |
| "rewards/reward_object_count": 0.56868577003479, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 463.0, | |
| "completions/mean_length": 159.109375, | |
| "completions/mean_terminated_length": 145.38095238095238, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 68.5, | |
| "grad_norm": 0.0001742025197017938, | |
| "kl": 0.020869133877567947, | |
| "learning_rate": 2.9124694691414485e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 4648946.0, | |
| "reward": 0.7437945008277893, | |
| "reward_std": 0.036315254867076874, | |
| "rewards/reward_matching": 0.6901907920837402, | |
| "rewards/reward_object_count": 0.6484003067016602, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 665.0, | |
| "completions/mean_length": 190.71875, | |
| "completions/mean_terminated_length": 177.4920634920635, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 69.0, | |
| "grad_norm": 0.00010955618199659511, | |
| "kl": 0.015927789616398513, | |
| "learning_rate": 2.9108267971668828e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 4680416.0, | |
| "reward": 0.5738010406494141, | |
| "reward_std": 0.057256221771240234, | |
| "rewards/reward_matching": 0.44618654251098633, | |
| "rewards/reward_object_count": 0.530445396900177, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 101.78125, | |
| "completions/mean_terminated_length": 101.78125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 69.5, | |
| "grad_norm": 9.076119749806821e-05, | |
| "kl": 0.025645660003647208, | |
| "learning_rate": 2.909169326286807e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 4709394.0, | |
| "reward": 0.7509521245956421, | |
| "reward_std": 0.05357357859611511, | |
| "rewards/reward_matching": 0.6316216588020325, | |
| "rewards/reward_object_count": 0.8598958253860474, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 919.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 174.90625, | |
| "completions/mean_terminated_length": 174.90625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 70.0, | |
| "grad_norm": 6.338445382425562e-05, | |
| "kl": 0.015905980253592134, | |
| "learning_rate": 2.9074970738873054e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 4738892.0, | |
| "reward": 0.6288744807243347, | |
| "reward_std": 0.058405984193086624, | |
| "rewards/reward_matching": 0.4976961612701416, | |
| "rewards/reward_object_count": 0.6512840986251831, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 116.984375, | |
| "completions/mean_terminated_length": 102.58730158730158, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 70.5, | |
| "grad_norm": 9.006184700410813e-05, | |
| "kl": 0.02571859711315483, | |
| "learning_rate": 2.9058100575095156e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 4764363.0, | |
| "reward": 0.5978801250457764, | |
| "reward_std": 0.056724559515714645, | |
| "rewards/reward_matching": 0.49124425649642944, | |
| "rewards/reward_object_count": 0.5156679153442383, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 994.0, | |
| "completions/mean_length": 309.84375, | |
| "completions/mean_terminated_length": 262.23333333333335, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 71.0, | |
| "grad_norm": 0.0003815116360783577, | |
| "kl": 0.029195085866376758, | |
| "learning_rate": 2.90410829484944e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 4806337.0, | |
| "reward": 0.6275303363800049, | |
| "reward_std": 0.11912961304187775, | |
| "rewards/reward_matching": 0.5174634456634521, | |
| "rewards/reward_object_count": 0.6165112257003784, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.0, | |
| "completions/max_terminated_length": 463.0, | |
| "completions/mean_length": 104.703125, | |
| "completions/mean_terminated_length": 104.703125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 71.5, | |
| "grad_norm": 9.876031981548294e-05, | |
| "kl": 0.02460621646605432, | |
| "learning_rate": 2.902391803757764e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4834542.0, | |
| "reward": 0.6761475205421448, | |
| "reward_std": 0.047544464468955994, | |
| "rewards/reward_matching": 0.5909047722816467, | |
| "rewards/reward_object_count": 0.6080232858657837, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 85.65625, | |
| "completions/mean_terminated_length": 85.65625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 72.0, | |
| "grad_norm": 9.556031000101939e-05, | |
| "kl": 0.02564686187542975, | |
| "learning_rate": 2.900660602239667e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 4857368.0, | |
| "reward": 0.650336742401123, | |
| "reward_std": 0.06377670913934708, | |
| "rewards/reward_matching": 0.5044280290603638, | |
| "rewards/reward_object_count": 0.7383996248245239, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 969.0, | |
| "completions/mean_length": 297.65625, | |
| "completions/mean_terminated_length": 236.10169491525423, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 72.5, | |
| "grad_norm": 0.00020726142975036055, | |
| "kl": 0.026772375334985554, | |
| "learning_rate": 2.8989147084546335e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 4896322.0, | |
| "reward": 0.6142957210540771, | |
| "reward_std": 0.06263671815395355, | |
| "rewards/reward_matching": 0.5525070428848267, | |
| "rewards/reward_object_count": 0.4295823872089386, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 152.96875, | |
| "completions/mean_terminated_length": 152.96875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 73.0, | |
| "grad_norm": 7.541166269220412e-05, | |
| "kl": 0.018190920585766435, | |
| "learning_rate": 2.8971541407162637e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 4926016.0, | |
| "reward": 0.6451115608215332, | |
| "reward_std": 0.041765011847019196, | |
| "rewards/reward_matching": 0.5400669574737549, | |
| "rewards/reward_object_count": 0.6053571701049805, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 177.90625, | |
| "completions/mean_terminated_length": 150.61290322580646, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 73.5, | |
| "grad_norm": 0.00026374394656158984, | |
| "kl": 0.028221046086400747, | |
| "learning_rate": 2.8953789174920795e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 4958266.0, | |
| "reward": 0.6425023078918457, | |
| "reward_std": 0.08397432416677475, | |
| "rewards/reward_matching": 0.5445351600646973, | |
| "rewards/reward_object_count": 0.5945312976837158, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 166.0625, | |
| "completions/mean_terminated_length": 152.44444444444446, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 74.0, | |
| "grad_norm": 0.00026537227677181363, | |
| "kl": 0.03987942379899323, | |
| "learning_rate": 2.8935890574033325e-05, | |
| "loss": -0.0017, | |
| "num_tokens": 4987198.0, | |
| "reward": 0.6173149347305298, | |
| "reward_std": 0.08371478319168091, | |
| "rewards/reward_matching": 0.48869776725769043, | |
| "rewards/reward_object_count": 0.667356550693512, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 237.0, | |
| "completions/max_terminated_length": 237.0, | |
| "completions/mean_length": 91.21875, | |
| "completions/mean_terminated_length": 91.21875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 74.5, | |
| "grad_norm": 0.00011484589049359784, | |
| "kl": 0.02938173897564411, | |
| "learning_rate": 2.8917845792248085e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 5014860.0, | |
| "reward": 0.7492313981056213, | |
| "reward_std": 0.06530742347240448, | |
| "rewards/reward_matching": 0.6417745351791382, | |
| "rewards/reward_object_count": 0.8208333253860474, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 733.0, | |
| "completions/mean_length": 258.140625, | |
| "completions/mean_terminated_length": 233.43548387096774, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 75.0, | |
| "grad_norm": 0.0001241009886143729, | |
| "kl": 0.014125383459031582, | |
| "learning_rate": 2.8899655018846297e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 5050325.0, | |
| "reward": 0.6234834790229797, | |
| "reward_std": 0.05080155283212662, | |
| "rewards/reward_matching": 0.5106083750724792, | |
| "rewards/reward_object_count": 0.5855922698974609, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 963.0, | |
| "completions/mean_length": 316.859375, | |
| "completions/mean_terminated_length": 269.71666666666664, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 75.5, | |
| "grad_norm": 0.000219573121285066, | |
| "kl": 0.019302582019008696, | |
| "learning_rate": 2.8881318444640564e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 5090508.0, | |
| "reward": 0.6606755256652832, | |
| "reward_std": 0.08510918915271759, | |
| "rewards/reward_matching": 0.5664112567901611, | |
| "rewards/reward_object_count": 0.6197687387466431, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 970.0, | |
| "completions/mean_length": 276.375, | |
| "completions/mean_terminated_length": 252.25806451612902, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 76.0, | |
| "grad_norm": 0.00031149154528975487, | |
| "kl": 0.029386045061983168, | |
| "learning_rate": 2.8862836261972873e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 5125540.0, | |
| "reward": 0.5237194299697876, | |
| "reward_std": 0.09430050849914551, | |
| "rewards/reward_matching": 0.39478474855422974, | |
| "rewards/reward_object_count": 0.4654930830001831, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 271.0, | |
| "completions/max_terminated_length": 271.0, | |
| "completions/mean_length": 124.234375, | |
| "completions/mean_terminated_length": 124.234375, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 76.5, | |
| "grad_norm": 0.00011776048631872982, | |
| "kl": 0.03456262964755297, | |
| "learning_rate": 2.8844208664712577e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 5151795.0, | |
| "reward": 0.6158008575439453, | |
| "reward_std": 0.07029575109481812, | |
| "rewards/reward_matching": 0.5279592275619507, | |
| "rewards/reward_object_count": 0.510751485824585, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 121.0, | |
| "completions/max_terminated_length": 121.0, | |
| "completions/mean_length": 54.828125, | |
| "completions/mean_terminated_length": 54.828125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 77.0, | |
| "grad_norm": 8.856238127918914e-05, | |
| "kl": 0.02841056394390762, | |
| "learning_rate": 2.882543584825435e-05, | |
| "loss": 0.0, | |
| "num_tokens": 5175208.0, | |
| "reward": 0.7693890333175659, | |
| "reward_std": 0.05110414698719978, | |
| "rewards/reward_matching": 0.6547108888626099, | |
| "rewards/reward_object_count": 0.8828125, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 310.921875, | |
| "completions/mean_terminated_length": 263.3833333333333, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 77.5, | |
| "grad_norm": 0.0002557071566116065, | |
| "kl": 0.022015991620719433, | |
| "learning_rate": 2.880651800951616e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5219171.0, | |
| "reward": 0.6839065551757812, | |
| "reward_std": 0.06827011704444885, | |
| "rewards/reward_matching": 0.6086301803588867, | |
| "rewards/reward_object_count": 0.6092674732208252, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 202.609375, | |
| "completions/mean_terminated_length": 176.11290322580646, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 78.0, | |
| "grad_norm": 8.753160363994539e-05, | |
| "kl": 0.018119822721928358, | |
| "learning_rate": 2.8787455346937182e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5250442.0, | |
| "reward": 0.6155011653900146, | |
| "reward_std": 0.04511785879731178, | |
| "rewards/reward_matching": 0.4806468188762665, | |
| "rewards/reward_object_count": 0.6355655193328857, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 287.390625, | |
| "completions/mean_terminated_length": 263.6290322580645, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 78.5, | |
| "grad_norm": 0.00010502748773433268, | |
| "kl": 0.015278441365808249, | |
| "learning_rate": 2.876824806047573e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 5288099.0, | |
| "reward": 0.5914611220359802, | |
| "reward_std": 0.04948745667934418, | |
| "rewards/reward_matching": 0.48842769861221313, | |
| "rewards/reward_object_count": 0.4920225143432617, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 577.0, | |
| "completions/max_terminated_length": 577.0, | |
| "completions/mean_length": 222.75, | |
| "completions/mean_terminated_length": 222.75, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "epoch": 79.0, | |
| "grad_norm": 0.0001686933246674016, | |
| "kl": 0.028436586260795593, | |
| "learning_rate": 2.8748896351607145e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 5320979.0, | |
| "reward": 0.6137920618057251, | |
| "reward_std": 0.08775545656681061, | |
| "rewards/reward_matching": 0.5258926153182983, | |
| "rewards/reward_object_count": 0.5069072246551514, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1021.0, | |
| "completions/mean_length": 389.984375, | |
| "completions/mean_terminated_length": 336.2542372881356, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 79.5, | |
| "grad_norm": 0.00013628082524519414, | |
| "kl": 0.013047700515016913, | |
| "learning_rate": 2.8729400423321693e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 5362642.0, | |
| "reward": 0.5530112385749817, | |
| "reward_std": 0.069613516330719, | |
| "rewards/reward_matching": 0.4320271611213684, | |
| "rewards/reward_object_count": 0.4689747095108032, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 205.0, | |
| "completions/mean_length": 64.96875, | |
| "completions/mean_terminated_length": 49.74603174603175, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 80.0, | |
| "grad_norm": 0.00011326887761242688, | |
| "kl": 0.03361499134916812, | |
| "learning_rate": 2.8709760480122443e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 5388944.0, | |
| "reward": 0.7502645254135132, | |
| "reward_std": 0.04951602220535278, | |
| "rewards/reward_matching": 0.6452133059501648, | |
| "rewards/reward_object_count": 0.8156828880310059, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 273.0, | |
| "completions/mean_length": 170.109375, | |
| "completions/mean_terminated_length": 156.55555555555554, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 80.5, | |
| "grad_norm": 0.00018552214896772057, | |
| "kl": 0.02882540924474597, | |
| "learning_rate": 2.8689976728023103e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 5421015.0, | |
| "reward": 0.6651661992073059, | |
| "reward_std": 0.072138212621212, | |
| "rewards/reward_matching": 0.5593208074569702, | |
| "rewards/reward_object_count": 0.6478685140609741, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 183.34375, | |
| "completions/mean_terminated_length": 183.34375, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 81.0, | |
| "grad_norm": 0.00038360359030775726, | |
| "kl": 0.06057113886345178, | |
| "learning_rate": 2.8670049374545873e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 5452013.0, | |
| "reward": 0.6705402135848999, | |
| "reward_std": 0.06918413937091827, | |
| "rewards/reward_matching": 0.5548437833786011, | |
| "rewards/reward_object_count": 0.7037945985794067, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 385.5, | |
| "completions/mean_terminated_length": 364.9032258064516, | |
| "completions/min_length": 83.0, | |
| "completions/min_terminated_length": 83.0, | |
| "epoch": 81.5, | |
| "grad_norm": 0.00013938083429820836, | |
| "kl": 0.012367542018182576, | |
| "learning_rate": 2.8649978628719256e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 5495309.0, | |
| "reward": 0.5702022314071655, | |
| "reward_std": 0.05095814913511276, | |
| "rewards/reward_matching": 0.48750579357147217, | |
| "rewards/reward_object_count": 0.38849371671676636, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 425.0, | |
| "completions/max_terminated_length": 425.0, | |
| "completions/mean_length": 141.015625, | |
| "completions/mean_terminated_length": 141.015625, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 82.0, | |
| "grad_norm": 8.968032489065081e-05, | |
| "kl": 0.044206105871126056, | |
| "learning_rate": 2.8629764701075885e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 5521998.0, | |
| "reward": 0.6258660554885864, | |
| "reward_std": 0.07497484982013702, | |
| "rewards/reward_matching": 0.47830966114997864, | |
| "rewards/reward_object_count": 0.7100260257720947, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1021.0, | |
| "completions/mean_length": 354.9375, | |
| "completions/mean_terminated_length": 285.7241379310345, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 82.5, | |
| "grad_norm": 0.00014277624723035842, | |
| "kl": 0.012639820342883468, | |
| "learning_rate": 2.8609407803650295e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 5564298.0, | |
| "reward": 0.604388952255249, | |
| "reward_std": 0.036000728607177734, | |
| "rewards/reward_matching": 0.5304588079452515, | |
| "rewards/reward_object_count": 0.43056821823120117, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 633.0, | |
| "completions/mean_length": 265.09375, | |
| "completions/mean_terminated_length": 240.61290322580646, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 83.0, | |
| "grad_norm": 0.0005736637976951897, | |
| "kl": 0.04679834772832692, | |
| "learning_rate": 2.8588908149976702e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 5598928.0, | |
| "reward": 0.5598403215408325, | |
| "reward_std": 0.11772333085536957, | |
| "rewards/reward_matching": 0.47532498836517334, | |
| "rewards/reward_object_count": 0.43572670221328735, | |
| "rewards/reward_parseable": 0.9375, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 235.6875, | |
| "completions/mean_terminated_length": 223.17460317460316, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 83.5, | |
| "grad_norm": 0.00011323333455948159, | |
| "kl": 0.02139199187513441, | |
| "learning_rate": 2.856826595508678e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 5636156.0, | |
| "reward": 0.6084253191947937, | |
| "reward_std": 0.04125886410474777, | |
| "rewards/reward_matching": 0.48200953006744385, | |
| "rewards/reward_object_count": 0.5960979461669922, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 277.84375, | |
| "completions/mean_terminated_length": 241.14754098360655, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 84.0, | |
| "grad_norm": 0.00011925779836019501, | |
| "kl": 0.013627393753267825, | |
| "learning_rate": 2.8547481435507382e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 5674162.0, | |
| "reward": 0.6911174654960632, | |
| "reward_std": 0.04913552850484848, | |
| "rewards/reward_matching": 0.5930126905441284, | |
| "rewards/reward_object_count": 0.6765491366386414, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 539.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 159.125, | |
| "completions/mean_terminated_length": 159.125, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 84.5, | |
| "grad_norm": 0.00013656335067935288, | |
| "kl": 0.03119825676549226, | |
| "learning_rate": 2.852655480925828e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 5702330.0, | |
| "reward": 0.5503749251365662, | |
| "reward_std": 0.07836627960205078, | |
| "rewards/reward_matching": 0.3975270688533783, | |
| "rewards/reward_object_count": 0.5749184489250183, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 865.0, | |
| "completions/mean_length": 285.15625, | |
| "completions/mean_terminated_length": 235.9, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 85.0, | |
| "grad_norm": 0.0001269476197194308, | |
| "kl": 0.015716996625997126, | |
| "learning_rate": 2.8505486295849884e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 5737924.0, | |
| "reward": 0.5956513285636902, | |
| "reward_std": 0.05721241980791092, | |
| "rewards/reward_matching": 0.495978981256485, | |
| "rewards/reward_object_count": 0.49031955003738403, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 235.0, | |
| "completions/mean_length": 167.46875, | |
| "completions/mean_terminated_length": 125.34426229508196, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 85.5, | |
| "grad_norm": 0.00018915600958280265, | |
| "kl": 0.032580646337009966, | |
| "learning_rate": 2.848427611628093e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 5766946.0, | |
| "reward": 0.5924456119537354, | |
| "reward_std": 0.06654588133096695, | |
| "rewards/reward_matching": 0.5018090009689331, | |
| "rewards/reward_object_count": 0.4724262058734894, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 231.484375, | |
| "completions/mean_terminated_length": 192.50819672131146, | |
| "completions/min_length": 11.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 86.0, | |
| "grad_norm": 0.00022493403230328113, | |
| "kl": 0.02257319202180952, | |
| "learning_rate": 2.8462924493036168e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 5806145.0, | |
| "reward": 0.7301434278488159, | |
| "reward_std": 0.07324472069740295, | |
| "rewards/reward_matching": 0.6351655721664429, | |
| "rewards/reward_object_count": 0.7608456611633301, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 971.0, | |
| "completions/max_terminated_length": 971.0, | |
| "completions/mean_length": 316.921875, | |
| "completions/mean_terminated_length": 316.921875, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 86.5, | |
| "grad_norm": 0.00020160213171038777, | |
| "kl": 0.018056653905659914, | |
| "learning_rate": 2.8441431650084018e-05, | |
| "loss": 0.001, | |
| "num_tokens": 5843452.0, | |
| "reward": 0.565083920955658, | |
| "reward_std": 0.04591123014688492, | |
| "rewards/reward_matching": 0.5012180805206299, | |
| "rewards/reward_object_count": 0.3217654228210449, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 668.0, | |
| "completions/max_terminated_length": 668.0, | |
| "completions/mean_length": 132.734375, | |
| "completions/mean_terminated_length": 132.734375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 87.0, | |
| "grad_norm": 9.513698751106858e-05, | |
| "kl": 0.019756762427277863, | |
| "learning_rate": 2.841979781287424e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 5874091.0, | |
| "reward": 0.721807599067688, | |
| "reward_std": 0.030880732461810112, | |
| "rewards/reward_matching": 0.632381796836853, | |
| "rewards/reward_object_count": 0.7118923664093018, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 709.0, | |
| "completions/max_terminated_length": 709.0, | |
| "completions/mean_length": 249.890625, | |
| "completions/mean_terminated_length": 249.890625, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 87.5, | |
| "grad_norm": 0.0001513262395747006, | |
| "kl": 0.022078259498812258, | |
| "learning_rate": 2.8398023208335537e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 5907748.0, | |
| "reward": 0.5269720554351807, | |
| "reward_std": 0.05287637189030647, | |
| "rewards/reward_matching": 0.398820161819458, | |
| "rewards/reward_object_count": 0.45402464270591736, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 895.0, | |
| "completions/mean_length": 298.640625, | |
| "completions/mean_terminated_length": 262.9672131147541, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 88.0, | |
| "grad_norm": 0.00025134760653600097, | |
| "kl": 0.04158466309309006, | |
| "learning_rate": 2.8376108064873216e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 5945805.0, | |
| "reward": 0.543440043926239, | |
| "reward_std": 0.06771589815616608, | |
| "rewards/reward_matching": 0.4773101806640625, | |
| "rewards/reward_object_count": 0.316519558429718, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 700.0, | |
| "completions/mean_length": 252.25, | |
| "completions/mean_terminated_length": 240.0, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 88.5, | |
| "grad_norm": 0.00017161465075332671, | |
| "kl": 0.03505228122230619, | |
| "learning_rate": 2.835405261236676e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 5978333.0, | |
| "reward": 0.6240804195404053, | |
| "reward_std": 0.06789788603782654, | |
| "rewards/reward_matching": 0.5318564772605896, | |
| "rewards/reward_object_count": 0.5404576063156128, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 601.0, | |
| "completions/max_terminated_length": 601.0, | |
| "completions/mean_length": 239.578125, | |
| "completions/mean_terminated_length": 239.578125, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 89.0, | |
| "grad_norm": 0.00016913673607632518, | |
| "kl": 0.02632876578718424, | |
| "learning_rate": 2.833185708216743e-05, | |
| "loss": 0.0, | |
| "num_tokens": 6011650.0, | |
| "reward": 0.5502422451972961, | |
| "reward_std": 0.0771588683128357, | |
| "rewards/reward_matching": 0.41941624879837036, | |
| "rewards/reward_object_count": 0.5085875988006592, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 199.859375, | |
| "completions/mean_terminated_length": 186.77777777777777, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 89.5, | |
| "grad_norm": 0.00014499339158646762, | |
| "kl": 0.024362510768696666, | |
| "learning_rate": 2.8309521707095835e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 6046585.0, | |
| "reward": 0.6444197297096252, | |
| "reward_std": 0.056686967611312866, | |
| "rewards/reward_matching": 0.5137526392936707, | |
| "rewards/reward_object_count": 0.680840790271759, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 712.0, | |
| "completions/mean_length": 214.703125, | |
| "completions/mean_terminated_length": 188.59677419354838, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 90.0, | |
| "grad_norm": 0.00012196839088574052, | |
| "kl": 0.024665123783051968, | |
| "learning_rate": 2.8287046721439487e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 6077030.0, | |
| "reward": 0.5945574045181274, | |
| "reward_std": 0.06549065560102463, | |
| "rewards/reward_matching": 0.5034471750259399, | |
| "rewards/reward_object_count": 0.4624456763267517, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 240.0, | |
| "completions/max_terminated_length": 240.0, | |
| "completions/mean_length": 134.484375, | |
| "completions/mean_terminated_length": 134.484375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 90.5, | |
| "grad_norm": 0.00010401565668871626, | |
| "kl": 0.03254161588847637, | |
| "learning_rate": 2.8264432360950355e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 6106501.0, | |
| "reward": 0.6277846693992615, | |
| "reward_std": 0.04529394954442978, | |
| "rewards/reward_matching": 0.542320966720581, | |
| "rewards/reward_object_count": 0.5119605660438538, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 700.0, | |
| "completions/mean_length": 295.390625, | |
| "completions/mean_terminated_length": 283.8253968253968, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 91.0, | |
| "grad_norm": 0.00018713607278186828, | |
| "kl": 0.030495932791382074, | |
| "learning_rate": 2.8241678862842374e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 6143390.0, | |
| "reward": 0.5975713729858398, | |
| "reward_std": 0.04703337699174881, | |
| "rewards/reward_matching": 0.48192787170410156, | |
| "rewards/reward_object_count": 0.5420730710029602, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 193.671875, | |
| "completions/mean_terminated_length": 193.671875, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 91.5, | |
| "grad_norm": 0.00020340237824711949, | |
| "kl": 0.034934017108753324, | |
| "learning_rate": 2.8218786465788984e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 6175689.0, | |
| "reward": 0.6315833926200867, | |
| "reward_std": 0.10472890734672546, | |
| "rewards/reward_matching": 0.49378255009651184, | |
| "rewards/reward_object_count": 0.6921942830085754, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1020.0, | |
| "completions/mean_length": 293.3125, | |
| "completions/mean_terminated_length": 257.37704918032784, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "epoch": 92.0, | |
| "grad_norm": 0.00030767079442739487, | |
| "kl": 0.02895939163863659, | |
| "learning_rate": 2.8195755409920584e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 6212765.0, | |
| "reward": 0.5491479635238647, | |
| "reward_std": 0.09953216463327408, | |
| "rewards/reward_matching": 0.4275497496128082, | |
| "rewards/reward_object_count": 0.49434059858322144, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 195.203125, | |
| "completions/mean_terminated_length": 168.46774193548387, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 92.5, | |
| "grad_norm": 0.00014441045641433448, | |
| "kl": 0.028225229121744633, | |
| "learning_rate": 2.8172585936822056e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 6245162.0, | |
| "reward": 0.6523048877716064, | |
| "reward_std": 0.06327737867832184, | |
| "rewards/reward_matching": 0.5074872970581055, | |
| "rewards/reward_object_count": 0.739062488079071, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 653.0, | |
| "completions/mean_length": 167.1875, | |
| "completions/mean_terminated_length": 139.5483870967742, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 93.0, | |
| "grad_norm": 0.0001068919082172215, | |
| "kl": 0.039769482566043735, | |
| "learning_rate": 2.814927828953022e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 6273206.0, | |
| "reward": 0.5778319835662842, | |
| "reward_std": 0.0708276629447937, | |
| "rewards/reward_matching": 0.4646483063697815, | |
| "rewards/reward_object_count": 0.5108397603034973, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 845.0, | |
| "completions/mean_length": 151.65625, | |
| "completions/mean_terminated_length": 137.8095238095238, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 93.5, | |
| "grad_norm": 0.00015888724010437727, | |
| "kl": 0.05473130161408335, | |
| "learning_rate": 2.812583271253125e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 6304096.0, | |
| "reward": 0.6830726861953735, | |
| "reward_std": 0.061051130294799805, | |
| "rewards/reward_matching": 0.5923636555671692, | |
| "rewards/reward_object_count": 0.653897225856781, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 995.0, | |
| "completions/mean_length": 263.234375, | |
| "completions/mean_terminated_length": 238.69354838709677, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "epoch": 94.0, | |
| "grad_norm": 0.00038123532431200147, | |
| "kl": 0.026985038304701447, | |
| "learning_rate": 2.8102249451758162e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 6340207.0, | |
| "reward": 0.6431405544281006, | |
| "reward_std": 0.07958689332008362, | |
| "rewards/reward_matching": 0.5382211804389954, | |
| "rewards/reward_object_count": 0.6166639924049377, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 387.0, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 102.609375, | |
| "completions/mean_terminated_length": 102.609375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 94.5, | |
| "grad_norm": 9.083108307095245e-05, | |
| "kl": 0.03483374323695898, | |
| "learning_rate": 2.8078528754588207e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 6366358.0, | |
| "reward": 0.6219313144683838, | |
| "reward_std": 0.039522431790828705, | |
| "rewards/reward_matching": 0.5477997064590454, | |
| "rewards/reward_object_count": 0.4662574529647827, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 93.3125, | |
| "completions/mean_terminated_length": 93.3125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 95.0, | |
| "grad_norm": 9.838482219493017e-05, | |
| "kl": 0.038898272439837456, | |
| "learning_rate": 2.805467086984027e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 6391914.0, | |
| "reward": 0.7201097011566162, | |
| "reward_std": 0.07063695788383484, | |
| "rewards/reward_matching": 0.5871620178222656, | |
| "rewards/reward_object_count": 0.839062511920929, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 501.0, | |
| "completions/mean_length": 159.984375, | |
| "completions/mean_terminated_length": 159.984375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 95.5, | |
| "grad_norm": 0.0001392089470755309, | |
| "kl": 0.030272011645138264, | |
| "learning_rate": 2.803067604777227e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 6421417.0, | |
| "reward": 0.650718092918396, | |
| "reward_std": 0.06433884799480438, | |
| "rewards/reward_matching": 0.5119979381561279, | |
| "rewards/reward_object_count": 0.7175967693328857, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 149.171875, | |
| "completions/mean_terminated_length": 135.28571428571428, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 96.0, | |
| "grad_norm": 0.00027154432609677315, | |
| "kl": 0.09678576281294227, | |
| "learning_rate": 2.8006544540078535e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 6453108.0, | |
| "reward": 0.6846904158592224, | |
| "reward_std": 0.0770767480134964, | |
| "rewards/reward_matching": 0.6123154759407043, | |
| "rewards/reward_object_count": 0.633380651473999, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 767.0, | |
| "completions/mean_length": 327.015625, | |
| "completions/mean_terminated_length": 280.55, | |
| "completions/min_length": 11.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 96.5, | |
| "grad_norm": 0.0002469752507749945, | |
| "kl": 0.027959817787632346, | |
| "learning_rate": 2.798227659988717e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 6492981.0, | |
| "reward": 0.6113128662109375, | |
| "reward_std": 0.0816509798169136, | |
| "rewards/reward_matching": 0.5030540823936462, | |
| "rewards/reward_object_count": 0.5630275011062622, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 805.0, | |
| "completions/mean_length": 267.453125, | |
| "completions/mean_terminated_length": 255.44444444444446, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 97.0, | |
| "grad_norm": 0.0006099395686760545, | |
| "kl": 0.06375238881446421, | |
| "learning_rate": 2.7957872481757377e-05, | |
| "loss": -0.0017, | |
| "num_tokens": 6530002.0, | |
| "reward": 0.5870110988616943, | |
| "reward_std": 0.06495887041091919, | |
| "rewards/reward_matching": 0.5122255086898804, | |
| "rewards/reward_object_count": 0.44525402784347534, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.0, | |
| "completions/mean_length": 241.21875, | |
| "completions/mean_terminated_length": 228.79365079365078, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 97.5, | |
| "grad_norm": 0.00015396032540593296, | |
| "kl": 0.022518991609103978, | |
| "learning_rate": 2.793333244167681e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 6561504.0, | |
| "reward": 0.6182751655578613, | |
| "reward_std": 0.05381970480084419, | |
| "rewards/reward_matching": 0.5018588304519653, | |
| "rewards/reward_object_count": 0.5857993960380554, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 97.671875, | |
| "completions/mean_terminated_length": 97.671875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 98.0, | |
| "grad_norm": 8.675593562657014e-05, | |
| "kl": 0.03554528998211026, | |
| "learning_rate": 2.790865673705888e-05, | |
| "loss": 0.0, | |
| "num_tokens": 6590219.0, | |
| "reward": 0.7543612122535706, | |
| "reward_std": 0.05268620699644089, | |
| "rewards/reward_matching": 0.6470256447792053, | |
| "rewards/reward_object_count": 0.8307291865348816, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 435.0, | |
| "completions/max_terminated_length": 435.0, | |
| "completions/mean_length": 158.203125, | |
| "completions/mean_terminated_length": 158.203125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 98.5, | |
| "grad_norm": 0.00011117629037471488, | |
| "kl": 0.02265286911278963, | |
| "learning_rate": 2.7883845626740046e-05, | |
| "loss": -0.0, | |
| "num_tokens": 6619288.0, | |
| "reward": 0.6950995326042175, | |
| "reward_std": 0.04912342131137848, | |
| "rewards/reward_matching": 0.5725617408752441, | |
| "rewards/reward_object_count": 0.7578125, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 261.4375, | |
| "completions/mean_terminated_length": 249.33333333333334, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 99.0, | |
| "grad_norm": 0.00015910938964225352, | |
| "kl": 0.01948182564228773, | |
| "learning_rate": 2.7858899370977123e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 6654324.0, | |
| "reward": 0.5254322290420532, | |
| "reward_std": 0.04911264032125473, | |
| "rewards/reward_matching": 0.4140118956565857, | |
| "rewards/reward_object_count": 0.3851252794265747, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 709.0, | |
| "completions/mean_length": 307.6875, | |
| "completions/mean_terminated_length": 284.5806451612903, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 99.5, | |
| "grad_norm": 0.00020870369917247444, | |
| "kl": 0.02964519546367228, | |
| "learning_rate": 2.783381823144452e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 6693920.0, | |
| "reward": 0.5857189893722534, | |
| "reward_std": 0.07470154017210007, | |
| "rewards/reward_matching": 0.5070096254348755, | |
| "rewards/reward_object_count": 0.4231909513473511, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 139.0, | |
| "completions/max_terminated_length": 139.0, | |
| "completions/mean_length": 58.0625, | |
| "completions/mean_terminated_length": 58.0625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 100.0, | |
| "grad_norm": 0.00010931400902336463, | |
| "kl": 0.04551131138578057, | |
| "learning_rate": 2.780860247123153e-05, | |
| "loss": -0.0, | |
| "num_tokens": 6716580.0, | |
| "reward": 0.6769169569015503, | |
| "reward_std": 0.06710667908191681, | |
| "rewards/reward_matching": 0.5183860063552856, | |
| "rewards/reward_object_count": 0.8294271230697632, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 484.0, | |
| "completions/max_terminated_length": 484.0, | |
| "completions/mean_length": 176.65625, | |
| "completions/mean_terminated_length": 176.65625, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 100.5, | |
| "grad_norm": 0.00011694195563904941, | |
| "kl": 0.0218208993319422, | |
| "learning_rate": 2.778325235483954e-05, | |
| "loss": 0.0, | |
| "num_tokens": 6746190.0, | |
| "reward": 0.6182457804679871, | |
| "reward_std": 0.0681074932217598, | |
| "rewards/reward_matching": 0.49204158782958984, | |
| "rewards/reward_object_count": 0.6151041984558105, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 766.0, | |
| "completions/mean_length": 201.8125, | |
| "completions/mean_terminated_length": 175.29032258064515, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 101.0, | |
| "grad_norm": 9.230217256117612e-05, | |
| "kl": 0.03192295634653419, | |
| "learning_rate": 2.775776814817928e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 6779330.0, | |
| "reward": 0.6676172018051147, | |
| "reward_std": 0.046617139130830765, | |
| "rewards/reward_matching": 0.5877115726470947, | |
| "rewards/reward_object_count": 0.5905760526657104, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.0, | |
| "completions/mean_length": 198.5625, | |
| "completions/mean_terminated_length": 185.46031746031747, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 101.5, | |
| "grad_norm": 0.0004894637386314571, | |
| "kl": 0.06391364219598472, | |
| "learning_rate": 2.7732150118568016e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 6813542.0, | |
| "reward": 0.5614990592002869, | |
| "reward_std": 0.10675215721130371, | |
| "rewards/reward_matching": 0.45702359080314636, | |
| "rewards/reward_object_count": 0.46767452359199524, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 957.0, | |
| "completions/mean_length": 288.671875, | |
| "completions/mean_terminated_length": 264.9516129032258, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 102.0, | |
| "grad_norm": 0.00020041225070599467, | |
| "kl": 0.016702863038517535, | |
| "learning_rate": 2.770639853472676e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 6852241.0, | |
| "reward": 0.6874593496322632, | |
| "reward_std": 0.056772105395793915, | |
| "rewards/reward_matching": 0.5878496170043945, | |
| "rewards/reward_object_count": 0.6737479567527771, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 274.625, | |
| "completions/mean_terminated_length": 224.66666666666666, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 102.5, | |
| "grad_norm": 0.00023115563089959323, | |
| "kl": 0.05850449949502945, | |
| "learning_rate": 2.768051366677744e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 6888761.0, | |
| "reward": 0.6374750733375549, | |
| "reward_std": 0.0849866271018982, | |
| "rewards/reward_matching": 0.5301556587219238, | |
| "rewards/reward_object_count": 0.6125332117080688, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.0, | |
| "completions/max_terminated_length": 463.0, | |
| "completions/mean_length": 133.578125, | |
| "completions/mean_terminated_length": 133.578125, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 103.0, | |
| "grad_norm": 0.000418533367337659, | |
| "kl": 0.09003371163271368, | |
| "learning_rate": 2.765449578624007e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 6918814.0, | |
| "reward": 0.6837650537490845, | |
| "reward_std": 0.047935061156749725, | |
| "rewards/reward_matching": 0.602238655090332, | |
| "rewards/reward_object_count": 0.6277344226837158, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.0, | |
| "completions/mean_length": 230.578125, | |
| "completions/mean_terminated_length": 163.33898305084745, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 103.5, | |
| "grad_norm": 0.00041621492709964514, | |
| "kl": 0.07373892690520734, | |
| "learning_rate": 2.7628345166029907e-05, | |
| "loss": -0.0015, | |
| "num_tokens": 6954115.0, | |
| "reward": 0.6255888342857361, | |
| "reward_std": 0.1271369755268097, | |
| "rewards/reward_matching": 0.5147528052330017, | |
| "rewards/reward_object_count": 0.6305608749389648, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 969.0, | |
| "completions/mean_length": 183.484375, | |
| "completions/mean_terminated_length": 142.14754098360655, | |
| "completions/min_length": 26.0, | |
| "completions/min_terminated_length": 26.0, | |
| "epoch": 104.0, | |
| "grad_norm": 0.0002878991945181042, | |
| "kl": 0.037236066767945886, | |
| "learning_rate": 2.760206208045458e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 6985122.0, | |
| "reward": 0.6342512369155884, | |
| "reward_std": 0.060641780495643616, | |
| "rewards/reward_matching": 0.5054891705513, | |
| "rewards/reward_object_count": 0.6704136729240417, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 729.0, | |
| "completions/max_terminated_length": 729.0, | |
| "completions/mean_length": 157.671875, | |
| "completions/mean_terminated_length": 157.671875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 104.5, | |
| "grad_norm": 0.00016803652397356927, | |
| "kl": 0.03241563029587269, | |
| "learning_rate": 2.7575646805211224e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 7016397.0, | |
| "reward": 0.6910985708236694, | |
| "reward_std": 0.06627818942070007, | |
| "rewards/reward_matching": 0.555336594581604, | |
| "rewards/reward_object_count": 0.7894831895828247, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 787.0, | |
| "completions/mean_length": 238.03125, | |
| "completions/mean_terminated_length": 199.37704918032787, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 105.0, | |
| "grad_norm": 0.00015113425615709275, | |
| "kl": 0.02445952722337097, | |
| "learning_rate": 2.7549099617383573e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 7048335.0, | |
| "reward": 0.6103414297103882, | |
| "reward_std": 0.058676064014434814, | |
| "rewards/reward_matching": 0.5071147680282593, | |
| "rewards/reward_object_count": 0.5303625464439392, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 748.0, | |
| "completions/mean_length": 279.390625, | |
| "completions/mean_terminated_length": 255.3709677419355, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 105.5, | |
| "grad_norm": 0.00012727123976219445, | |
| "kl": 0.021365185733884573, | |
| "learning_rate": 2.7522420795439067e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 7084840.0, | |
| "reward": 0.5371774435043335, | |
| "reward_std": 0.03956557810306549, | |
| "rewards/reward_matching": 0.4609455466270447, | |
| "rewards/reward_object_count": 0.30305057764053345, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 634.0, | |
| "completions/mean_length": 214.921875, | |
| "completions/mean_terminated_length": 160.98333333333332, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 106.0, | |
| "grad_norm": 0.00022877000446897, | |
| "kl": 0.07105760963167995, | |
| "learning_rate": 2.7495610619225925e-05, | |
| "loss": -0.001, | |
| "num_tokens": 7116259.0, | |
| "reward": 0.5825880765914917, | |
| "reward_std": 0.10015714913606644, | |
| "rewards/reward_matching": 0.47119754552841187, | |
| "rewards/reward_object_count": 0.5462228059768677, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 673.0, | |
| "completions/max_terminated_length": 673.0, | |
| "completions/mean_length": 163.0, | |
| "completions/mean_terminated_length": 163.0, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 106.5, | |
| "grad_norm": 0.00011093632929259911, | |
| "kl": 0.025665281806141138, | |
| "learning_rate": 2.746866936997021e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 7147555.0, | |
| "reward": 0.6465635895729065, | |
| "reward_std": 0.03907918184995651, | |
| "rewards/reward_matching": 0.554027795791626, | |
| "rewards/reward_object_count": 0.5707347393035889, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 403.0, | |
| "completions/mean_length": 118.453125, | |
| "completions/mean_terminated_length": 104.07936507936508, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 107.0, | |
| "grad_norm": 0.00031378481071442366, | |
| "kl": 0.1566272620111704, | |
| "learning_rate": 2.7441597330272874e-05, | |
| "loss": 0.0, | |
| "num_tokens": 7173760.0, | |
| "reward": 0.6559479236602783, | |
| "reward_std": 0.08748061209917068, | |
| "rewards/reward_matching": 0.5535061955451965, | |
| "rewards/reward_object_count": 0.6504712104797363, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 865.0, | |
| "completions/mean_length": 298.25, | |
| "completions/mean_terminated_length": 274.83870967741933, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "epoch": 107.5, | |
| "grad_norm": 0.00012201828212710097, | |
| "kl": 0.05274133931379765, | |
| "learning_rate": 2.7414394784106812e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 7213072.0, | |
| "reward": 0.6408737897872925, | |
| "reward_std": 0.0489664301276207, | |
| "rewards/reward_matching": 0.5434332489967346, | |
| "rewards/reward_object_count": 0.5896943807601929, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 270.0, | |
| "completions/mean_length": 133.609375, | |
| "completions/mean_terminated_length": 119.47619047619048, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 108.0, | |
| "grad_norm": 0.00017554641817696393, | |
| "kl": 0.0426066645886749, | |
| "learning_rate": 2.7387062016813845e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 7244087.0, | |
| "reward": 0.6718785762786865, | |
| "reward_std": 0.04563286155462265, | |
| "rewards/reward_matching": 0.5966243147850037, | |
| "rewards/reward_object_count": 0.5695203542709351, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 349.0, | |
| "completions/max_terminated_length": 349.0, | |
| "completions/mean_length": 102.296875, | |
| "completions/mean_terminated_length": 102.296875, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "epoch": 108.5, | |
| "grad_norm": 0.00015638030890841037, | |
| "kl": 0.08242244273424149, | |
| "learning_rate": 2.7359599315101788e-05, | |
| "loss": 0.0, | |
| "num_tokens": 7269898.0, | |
| "reward": 0.6443240642547607, | |
| "reward_std": 0.0721898227930069, | |
| "rewards/reward_matching": 0.5029533505439758, | |
| "rewards/reward_object_count": 0.7283854484558105, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 930.0, | |
| "completions/mean_length": 269.09375, | |
| "completions/mean_terminated_length": 257.1111111111111, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 109.0, | |
| "grad_norm": 0.00017680412565823644, | |
| "kl": 0.025702679064124823, | |
| "learning_rate": 2.7332006967041373e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 7306384.0, | |
| "reward": 0.593836784362793, | |
| "reward_std": 0.0476648211479187, | |
| "rewards/reward_matching": 0.540346622467041, | |
| "rewards/reward_object_count": 0.3481438159942627, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 653.0, | |
| "completions/max_terminated_length": 653.0, | |
| "completions/mean_length": 262.984375, | |
| "completions/mean_terminated_length": 262.984375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 109.5, | |
| "grad_norm": 0.00013252743519842625, | |
| "kl": 0.02228926634415984, | |
| "learning_rate": 2.7304285262063274e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 7344719.0, | |
| "reward": 0.6599787473678589, | |
| "reward_std": 0.04485698789358139, | |
| "rewards/reward_matching": 0.5767978429794312, | |
| "rewards/reward_object_count": 0.5695002675056458, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 235.0, | |
| "completions/max_terminated_length": 235.0, | |
| "completions/mean_length": 82.4375, | |
| "completions/mean_terminated_length": 82.4375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 110.0, | |
| "grad_norm": 9.248981950804591e-05, | |
| "kl": 0.039374147076159716, | |
| "learning_rate": 2.7276434490955074e-05, | |
| "loss": -0.0, | |
| "num_tokens": 7369259.0, | |
| "reward": 0.6650457382202148, | |
| "reward_std": 0.05076444894075394, | |
| "rewards/reward_matching": 0.5281144380569458, | |
| "rewards/reward_object_count": 0.7408854365348816, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 615.0, | |
| "completions/max_terminated_length": 615.0, | |
| "completions/mean_length": 186.984375, | |
| "completions/mean_terminated_length": 186.984375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 110.5, | |
| "grad_norm": 0.00014483708946499974, | |
| "kl": 0.02676176093518734, | |
| "learning_rate": 2.7248454945858164e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 7400490.0, | |
| "reward": 0.6866004467010498, | |
| "reward_std": 0.04909896105527878, | |
| "rewards/reward_matching": 0.5768593549728394, | |
| "rewards/reward_object_count": 0.7024243474006653, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 667.0, | |
| "completions/mean_length": 211.59375, | |
| "completions/mean_terminated_length": 198.6984126984127, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 111.0, | |
| "grad_norm": 0.0001923592935781926, | |
| "kl": 0.044007426826283336, | |
| "learning_rate": 2.7220346920264743e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 7430096.0, | |
| "reward": 0.5381298661231995, | |
| "reward_std": 0.09278477728366852, | |
| "rewards/reward_matching": 0.4138203263282776, | |
| "rewards/reward_object_count": 0.48043835163116455, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 254.734375, | |
| "completions/mean_terminated_length": 216.9016393442623, | |
| "completions/min_length": 27.0, | |
| "completions/min_terminated_length": 27.0, | |
| "epoch": 111.5, | |
| "grad_norm": 0.00020335610315669328, | |
| "kl": 0.0452556642703712, | |
| "learning_rate": 2.71921107090147e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 7467263.0, | |
| "reward": 0.6236730813980103, | |
| "reward_std": 0.060264602303504944, | |
| "rewards/reward_matching": 0.5103158950805664, | |
| "rewards/reward_object_count": 0.603042721748352, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 564.0, | |
| "completions/max_terminated_length": 564.0, | |
| "completions/mean_length": 164.46875, | |
| "completions/mean_terminated_length": 164.46875, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 112.0, | |
| "grad_norm": 0.00010737024422269315, | |
| "kl": 0.041457608342170715, | |
| "learning_rate": 2.7163746608292525e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 7493533.0, | |
| "reward": 0.5919159650802612, | |
| "reward_std": 0.06952238082885742, | |
| "rewards/reward_matching": 0.4673612117767334, | |
| "rewards/reward_object_count": 0.5731213092803955, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 943.0, | |
| "completions/max_terminated_length": 943.0, | |
| "completions/mean_length": 223.203125, | |
| "completions/mean_terminated_length": 223.203125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 112.5, | |
| "grad_norm": 0.00016135169425979257, | |
| "kl": 0.024177260580472648, | |
| "learning_rate": 2.7135254915624213e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 7525482.0, | |
| "reward": 0.5558507442474365, | |
| "reward_std": 0.05584409832954407, | |
| "rewards/reward_matching": 0.40790149569511414, | |
| "rewards/reward_object_count": 0.5555493831634521, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 653.0, | |
| "completions/mean_length": 254.125, | |
| "completions/mean_terminated_length": 216.2622950819672, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 113.0, | |
| "grad_norm": 0.0002720111224334687, | |
| "kl": 0.05173568660393357, | |
| "learning_rate": 2.710663592987414e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 7560370.0, | |
| "reward": 0.5505021810531616, | |
| "reward_std": 0.09670871496200562, | |
| "rewards/reward_matching": 0.4270167350769043, | |
| "rewards/reward_object_count": 0.5183357000350952, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 636.0, | |
| "completions/mean_length": 222.921875, | |
| "completions/mean_terminated_length": 210.20634920634922, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 113.5, | |
| "grad_norm": 0.00038458031485788524, | |
| "kl": 0.0842397476080805, | |
| "learning_rate": 2.7077889951241924e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 7594541.0, | |
| "reward": 0.584170937538147, | |
| "reward_std": 0.09360209852457047, | |
| "rewards/reward_matching": 0.463908314704895, | |
| "rewards/reward_object_count": 0.5603794455528259, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 241.0, | |
| "completions/max_terminated_length": 241.0, | |
| "completions/mean_length": 99.328125, | |
| "completions/mean_terminated_length": 99.328125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 114.0, | |
| "grad_norm": 8.975666423793882e-05, | |
| "kl": 0.04190274514257908, | |
| "learning_rate": 2.704901728125928e-05, | |
| "loss": -0.0, | |
| "num_tokens": 7620802.0, | |
| "reward": 0.6767340302467346, | |
| "reward_std": 0.034747164696455, | |
| "rewards/reward_matching": 0.619060754776001, | |
| "rewards/reward_object_count": 0.5264881253242493, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 969.0, | |
| "completions/mean_length": 229.8125, | |
| "completions/mean_terminated_length": 204.19354838709677, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 114.5, | |
| "grad_norm": 0.00017571848002262414, | |
| "kl": 0.02555367280729115, | |
| "learning_rate": 2.702001822278685e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 7653814.0, | |
| "reward": 0.650544285774231, | |
| "reward_std": 0.07043524831533432, | |
| "rewards/reward_matching": 0.5434165596961975, | |
| "rewards/reward_object_count": 0.6224716901779175, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 601.0, | |
| "completions/mean_length": 165.8125, | |
| "completions/mean_terminated_length": 152.1904761904762, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 115.0, | |
| "grad_norm": 0.0004399629251565784, | |
| "kl": 0.1373476954177022, | |
| "learning_rate": 2.699089308001104e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 7684970.0, | |
| "reward": 0.5809732675552368, | |
| "reward_std": 0.05775413662195206, | |
| "rewards/reward_matching": 0.5023462772369385, | |
| "rewards/reward_object_count": 0.4447025954723358, | |
| "rewards/reward_parseable": 0.953125, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1006.0, | |
| "completions/mean_length": 263.953125, | |
| "completions/mean_terminated_length": 226.5737704918033, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 115.5, | |
| "grad_norm": 0.00015429549966938794, | |
| "kl": 0.022524354048073292, | |
| "learning_rate": 2.696164215844081e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 7720167.0, | |
| "reward": 0.6703872084617615, | |
| "reward_std": 0.03771934285759926, | |
| "rewards/reward_matching": 0.5701560974121094, | |
| "rewards/reward_object_count": 0.6414677500724792, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 947.0, | |
| "completions/max_terminated_length": 947.0, | |
| "completions/mean_length": 259.734375, | |
| "completions/mean_terminated_length": 259.734375, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 116.0, | |
| "grad_norm": 0.0001455041056033224, | |
| "kl": 0.03513137577101588, | |
| "learning_rate": 2.6932265764904494e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 7757654.0, | |
| "reward": 0.5976256132125854, | |
| "reward_std": 0.073645681142807, | |
| "rewards/reward_matching": 0.46657925844192505, | |
| "rewards/reward_object_count": 0.604015588760376, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 709.0, | |
| "completions/max_terminated_length": 709.0, | |
| "completions/mean_length": 232.578125, | |
| "completions/mean_terminated_length": 232.578125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 116.5, | |
| "grad_norm": 0.00014214629482012242, | |
| "kl": 0.026127594290301204, | |
| "learning_rate": 2.6902764207546553e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 7791163.0, | |
| "reward": 0.5486085414886475, | |
| "reward_std": 0.05609787255525589, | |
| "rewards/reward_matching": 0.4533560872077942, | |
| "rewards/reward_object_count": 0.3829742670059204, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 673.0, | |
| "completions/mean_length": 195.109375, | |
| "completions/mean_terminated_length": 181.95238095238096, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 117.0, | |
| "grad_norm": 0.0001753616234054789, | |
| "kl": 0.029080318985506892, | |
| "learning_rate": 2.6873137795824367e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 7822914.0, | |
| "reward": 0.6358436942100525, | |
| "reward_std": 0.0704498291015625, | |
| "rewards/reward_matching": 0.5291736125946045, | |
| "rewards/reward_object_count": 0.5916976928710938, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 615.0, | |
| "completions/mean_length": 183.625, | |
| "completions/mean_terminated_length": 156.51612903225808, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 117.5, | |
| "grad_norm": 0.00017552314966451377, | |
| "kl": 0.05639212904497981, | |
| "learning_rate": 2.6843386840504972e-05, | |
| "loss": -0.0011, | |
| "num_tokens": 7852330.0, | |
| "reward": 0.5928733348846436, | |
| "reward_std": 0.08490997552871704, | |
| "rewards/reward_matching": 0.47806107997894287, | |
| "rewards/reward_object_count": 0.5614335536956787, | |
| "rewards/reward_parseable": 0.96875, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 933.0, | |
| "completions/mean_length": 233.578125, | |
| "completions/mean_terminated_length": 208.08064516129033, | |
| "completions/min_length": 23.0, | |
| "completions/min_terminated_length": 23.0, | |
| "epoch": 118.0, | |
| "grad_norm": 0.00035216257674619555, | |
| "kl": 0.03552743140608072, | |
| "learning_rate": 2.6813511653661817e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 7888783.0, | |
| "reward": 0.657150149345398, | |
| "reward_std": 0.06232122331857681, | |
| "rewards/reward_matching": 0.5920723676681519, | |
| "rewards/reward_object_count": 0.525158703327179, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 919.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 265.125, | |
| "completions/mean_terminated_length": 265.125, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 118.5, | |
| "grad_norm": 0.00011095030640717596, | |
| "kl": 0.020938155241310596, | |
| "learning_rate": 2.678351254867147e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 7925335.0, | |
| "reward": 0.6279448866844177, | |
| "reward_std": 0.03555089607834816, | |
| "rewards/reward_matching": 0.5564066767692566, | |
| "rewards/reward_object_count": 0.4705043435096741, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 304.0, | |
| "completions/mean_length": 141.28125, | |
| "completions/mean_terminated_length": 127.26984126984127, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 119.0, | |
| "grad_norm": 0.00019420348689891398, | |
| "kl": 0.051794532453641295, | |
| "learning_rate": 2.675338984021035e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 7953321.0, | |
| "reward": 0.6752597093582153, | |
| "reward_std": 0.06845426559448242, | |
| "rewards/reward_matching": 0.5453788042068481, | |
| "rewards/reward_object_count": 0.7401620149612427, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 935.0, | |
| "completions/mean_length": 296.734375, | |
| "completions/mean_terminated_length": 260.9672131147541, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 119.5, | |
| "grad_norm": 0.00035309739178046584, | |
| "kl": 0.045890753623098135, | |
| "learning_rate": 2.672314384425142e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 7992536.0, | |
| "reward": 0.6269891262054443, | |
| "reward_std": 0.05093158036470413, | |
| "rewards/reward_matching": 0.5626972913742065, | |
| "rewards/reward_object_count": 0.4624785780906677, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 227.296875, | |
| "completions/mean_terminated_length": 214.65079365079364, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 120.0, | |
| "grad_norm": 0.0002618706494104117, | |
| "kl": 0.0391268425155431, | |
| "learning_rate": 2.669277487806085e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 8027627.0, | |
| "reward": 0.6494286060333252, | |
| "reward_std": 0.09994396567344666, | |
| "rewards/reward_matching": 0.5093706846237183, | |
| "rewards/reward_object_count": 0.7346560955047607, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 733.0, | |
| "completions/mean_length": 207.140625, | |
| "completions/mean_terminated_length": 194.17460317460316, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 120.5, | |
| "grad_norm": 0.00017189256323035806, | |
| "kl": 0.03264498105272651, | |
| "learning_rate": 2.6662283260194743e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 8059188.0, | |
| "reward": 0.6138782501220703, | |
| "reward_std": 0.07262279093265533, | |
| "rewards/reward_matching": 0.49560919404029846, | |
| "rewards/reward_object_count": 0.5981885194778442, | |
| "rewards/reward_parseable": 0.984375, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 767.0, | |
| "completions/max_terminated_length": 767.0, | |
| "completions/mean_length": 224.9375, | |
| "completions/mean_terminated_length": 224.9375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 121.0, | |
| "grad_norm": 0.00012785749277099967, | |
| "kl": 0.019806620781309903, | |
| "learning_rate": 2.6631669310495725e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 8093488.0, | |
| "reward": 0.7159284353256226, | |
| "reward_std": 0.04454932361841202, | |
| "rewards/reward_matching": 0.6159651279449463, | |
| "rewards/reward_object_count": 0.7317466139793396, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 241.0, | |
| "completions/max_terminated_length": 241.0, | |
| "completions/mean_length": 97.21875, | |
| "completions/mean_terminated_length": 97.21875, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 121.5, | |
| "grad_norm": 0.0001221935817738995, | |
| "kl": 0.03843500721268356, | |
| "learning_rate": 2.660093335008966e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 8118974.0, | |
| "reward": 0.6205872893333435, | |
| "reward_std": 0.04494061693549156, | |
| "rewards/reward_matching": 0.5400537252426147, | |
| "rewards/reward_object_count": 0.48277533054351807, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 693.0, | |
| "completions/mean_length": 220.65625, | |
| "completions/mean_terminated_length": 207.9047619047619, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 122.0, | |
| "grad_norm": 0.00015747918223496526, | |
| "kl": 0.027920755557715893, | |
| "learning_rate": 2.6570075701382213e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 8154280.0, | |
| "reward": 0.6552270650863647, | |
| "reward_std": 0.06700208783149719, | |
| "rewards/reward_matching": 0.5130065679550171, | |
| "rewards/reward_object_count": 0.7371156215667725, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 615.0, | |
| "completions/max_terminated_length": 615.0, | |
| "completions/mean_length": 198.59375, | |
| "completions/mean_terminated_length": 198.59375, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 122.5, | |
| "grad_norm": 0.00014975345402490348, | |
| "kl": 0.028760560788214207, | |
| "learning_rate": 2.653909668805553e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 8184014.0, | |
| "reward": 0.5549274682998657, | |
| "reward_std": 0.045301198959350586, | |
| "rewards/reward_matching": 0.4701750874519348, | |
| "rewards/reward_object_count": 0.36411210894584656, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 821.0, | |
| "completions/mean_length": 225.0625, | |
| "completions/mean_terminated_length": 171.8, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 123.0, | |
| "grad_norm": 0.0065308245830237865, | |
| "kl": 0.17872803064528853, | |
| "learning_rate": 2.6507996635064792e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 8217042.0, | |
| "reward": 0.6046477556228638, | |
| "reward_std": 0.1092238575220108, | |
| "rewards/reward_matching": 0.467964768409729, | |
| "rewards/reward_object_count": 0.6818444728851318, | |
| "rewards/reward_parseable": 0.9375, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 895.0, | |
| "completions/mean_length": 166.78125, | |
| "completions/mean_terminated_length": 153.17460317460316, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 123.5, | |
| "grad_norm": 0.00011328323307679966, | |
| "kl": 0.026759653817862272, | |
| "learning_rate": 2.647677586863484e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 8248260.0, | |
| "reward": 0.7336732149124146, | |
| "reward_std": 0.05173768848180771, | |
| "rewards/reward_matching": 0.6460912227630615, | |
| "rewards/reward_object_count": 0.7300925254821777, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 997.0, | |
| "completions/mean_length": 288.765625, | |
| "completions/mean_terminated_length": 277.0952380952381, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 124.0, | |
| "grad_norm": 0.00037040153983980417, | |
| "kl": 0.06496235262602568, | |
| "learning_rate": 2.644543471625675e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 8284725.0, | |
| "reward": 0.539496898651123, | |
| "reward_std": 0.06459835916757584, | |
| "rewards/reward_matching": 0.4335413873195648, | |
| "rewards/reward_object_count": 0.4593604803085327, | |
| "rewards/reward_parseable": 0.9375, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 826.0, | |
| "completions/mean_length": 166.90625, | |
| "completions/mean_terminated_length": 139.25806451612902, | |
| "completions/min_length": 40.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 124.5, | |
| "grad_norm": 0.00012266320118214935, | |
| "kl": 0.027017448912374675, | |
| "learning_rate": 2.6413973506684366e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 8310511.0, | |
| "reward": 0.5934783220291138, | |
| "reward_std": 0.06606625020503998, | |
| "rewards/reward_matching": 0.47368282079696655, | |
| "rewards/reward_object_count": 0.546343207359314, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 805.0, | |
| "completions/max_terminated_length": 805.0, | |
| "completions/mean_length": 310.1875, | |
| "completions/mean_terminated_length": 310.1875, | |
| "completions/min_length": 121.0, | |
| "completions/min_terminated_length": 121.0, | |
| "epoch": 125.0, | |
| "grad_norm": 0.00016481673810631037, | |
| "kl": 0.023303369292989373, | |
| "learning_rate": 2.63823925699309e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 8351867.0, | |
| "reward": 0.6260138154029846, | |
| "reward_std": 0.06270426511764526, | |
| "rewards/reward_matching": 0.5143136978149414, | |
| "rewards/reward_object_count": 0.5871279835700989, | |
| "rewards/reward_parseable": 1.0, | |
| "step": 250 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 8351867, | |
| "num_train_epochs": 500, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": true, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |