| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.998109640831758, | |
| "eval_steps": 51, | |
| "global_step": 198, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1519097222222222, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1009.0, | |
| "completions/mean_length": 318.44444783528644, | |
| "completions/mean_terminated_length": 192.69292195638022, | |
| "completions/min_length": 20.666666666666668, | |
| "completions/min_terminated_length": 20.666666666666668, | |
| "epoch": 0.045368620037807186, | |
| "grad_norm": 0.1771457940340042, | |
| "kl": 5.446871121724447e-05, | |
| "learning_rate": 4e-07, | |
| "loss": -0.0064, | |
| "num_tokens": 949568.0, | |
| "reward": 0.4211084047953288, | |
| "reward_std": 0.3868949313958486, | |
| "rewards/get_embedding_sim/mean": 0.3429833749930064, | |
| "rewards/get_embedding_sim/std": 0.06474291781584422, | |
| "rewards/reward_num_unique_chars/mean": 0.026324149842063587, | |
| "rewards/reward_num_unique_chars/std": 0.1598906268676122, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1171875, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 992.3333333333334, | |
| "completions/mean_length": 298.11285400390625, | |
| "completions/mean_terminated_length": 200.7570597330729, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 0.09073724007561437, | |
| "grad_norm": 0.12331758439540863, | |
| "kl": 0.00014600654443105063, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0254, | |
| "num_tokens": 1880082.0, | |
| "reward": 0.6416476865609487, | |
| "reward_std": 0.5329093436400095, | |
| "rewards/get_embedding_sim/mean": 0.36821014682451886, | |
| "rewards/get_embedding_sim/std": 0.07475950072209041, | |
| "rewards/reward_num_unique_chars/mean": 0.09220736970504124, | |
| "rewards/reward_num_unique_chars/std": 0.2722427050272624, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08680555555555558, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 970.0, | |
| "completions/mean_length": 252.9539998372396, | |
| "completions/mean_terminated_length": 180.34420776367188, | |
| "completions/min_length": 24.666666666666668, | |
| "completions/min_terminated_length": 24.666666666666668, | |
| "epoch": 0.13610586011342155, | |
| "grad_norm": 0.08608454465866089, | |
| "kl": 0.0001450727383295695, | |
| "learning_rate": 1e-06, | |
| "loss": 0.024, | |
| "num_tokens": 2753485.0, | |
| "reward": 0.5257614056269327, | |
| "reward_std": 0.4465513428052266, | |
| "rewards/get_embedding_sim/mean": 0.33044888575871784, | |
| "rewards/get_embedding_sim/std": 0.07643905778725942, | |
| "rewards/reward_num_unique_chars/mean": 0.06572048738598824, | |
| "rewards/reward_num_unique_chars/std": 0.24313671390215555, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.10763888888888888, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 980.6666666666666, | |
| "completions/mean_length": 271.1293538411458, | |
| "completions/mean_terminated_length": 180.34170532226562, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 0.18147448015122875, | |
| "grad_norm": 0.11351985484361649, | |
| "kl": 0.000451435645421346, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0161, | |
| "num_tokens": 3655650.0, | |
| "reward": 0.5510341823101044, | |
| "reward_std": 0.5266622304916382, | |
| "rewards/get_embedding_sim/mean": 0.332284152507782, | |
| "rewards/get_embedding_sim/std": 0.07756081471840541, | |
| "rewards/reward_num_unique_chars/mean": 0.07334695508082707, | |
| "rewards/reward_num_unique_chars/std": 0.2513364603122075, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1484375, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 975.0, | |
| "completions/mean_length": 322.98785400390625, | |
| "completions/mean_terminated_length": 200.65520731608072, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.22684310018903592, | |
| "grad_norm": 0.1555059850215912, | |
| "kl": 0.0004805326461791992, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0487, | |
| "num_tokens": 4589956.0, | |
| "reward": 0.5868227481842041, | |
| "reward_std": 0.5241502523422241, | |
| "rewards/get_embedding_sim/mean": 0.3524477581183116, | |
| "rewards/get_embedding_sim/std": 0.07752909014622371, | |
| "rewards/reward_num_unique_chars/mean": 0.0786214725424846, | |
| "rewards/reward_num_unique_chars/std": 0.2524682929118474, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09982638888888888, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 973.0, | |
| "completions/mean_length": 263.34288533528644, | |
| "completions/mean_terminated_length": 178.2960662841797, | |
| "completions/min_length": 17.666666666666668, | |
| "completions/min_terminated_length": 17.666666666666668, | |
| "epoch": 0.2722117202268431, | |
| "grad_norm": 0.15412873029708862, | |
| "kl": 0.0006418625513712565, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0529, | |
| "num_tokens": 5476095.0, | |
| "reward": 0.7471893429756165, | |
| "reward_std": 0.6821479399998983, | |
| "rewards/get_embedding_sim/mean": 0.3487518032391866, | |
| "rewards/get_embedding_sim/std": 0.07891600827376048, | |
| "rewards/reward_num_unique_chars/mean": 0.13329477856556574, | |
| "rewards/reward_num_unique_chars/std": 0.3387155433495839, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333333, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 989.6666666666666, | |
| "completions/mean_length": 239.4496612548828, | |
| "completions/mean_terminated_length": 168.3182576497396, | |
| "completions/min_length": 14.666666666666666, | |
| "completions/min_terminated_length": 14.666666666666666, | |
| "epoch": 0.31758034026465026, | |
| "grad_norm": 0.19775940477848053, | |
| "kl": 0.001989444096883138, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0232, | |
| "num_tokens": 6334901.0, | |
| "reward": 0.6757530768712362, | |
| "reward_std": 0.5965128739674886, | |
| "rewards/get_embedding_sim/mean": 0.34762802720069885, | |
| "rewards/get_embedding_sim/std": 0.061141988883415856, | |
| "rewards/reward_num_unique_chars/mean": 0.11084798475106557, | |
| "rewards/reward_num_unique_chars/std": 0.31187912821769714, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09722222222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 973.0, | |
| "completions/mean_length": 223.53906758626303, | |
| "completions/mean_terminated_length": 137.0214869181315, | |
| "completions/min_length": 16.0, | |
| "completions/min_terminated_length": 16.0, | |
| "epoch": 0.3629489603024575, | |
| "grad_norm": 0.159920796751976, | |
| "kl": 0.003296534220377604, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0371, | |
| "num_tokens": 7177202.0, | |
| "reward": 0.7903947830200195, | |
| "reward_std": 0.7087553143501282, | |
| "rewards/get_embedding_sim/mean": 0.37893640001614887, | |
| "rewards/get_embedding_sim/std": 0.07807190467913945, | |
| "rewards/reward_num_unique_chars/mean": 0.13736129055420557, | |
| "rewards/reward_num_unique_chars/std": 0.3415720462799072, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04340277777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 871.0, | |
| "completions/mean_length": 189.19965616861978, | |
| "completions/mean_terminated_length": 151.40495808919272, | |
| "completions/min_length": 14.333333333333334, | |
| "completions/min_terminated_length": 14.333333333333334, | |
| "epoch": 0.40831758034026466, | |
| "grad_norm": 0.07982576638460159, | |
| "kl": 0.006541093190511067, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0323, | |
| "num_tokens": 7978744.0, | |
| "reward": 0.9137211839358012, | |
| "reward_std": 0.718700091044108, | |
| "rewards/get_embedding_sim/mean": 0.36163782080014545, | |
| "rewards/get_embedding_sim/std": 0.0756089190642039, | |
| "rewards/reward_num_unique_chars/mean": 0.1940170923868815, | |
| "rewards/reward_num_unique_chars/std": 0.3915421764055888, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04253472222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 950.3333333333334, | |
| "completions/mean_length": 195.49305725097656, | |
| "completions/mean_terminated_length": 158.83220418294272, | |
| "completions/min_length": 20.666666666666668, | |
| "completions/min_terminated_length": 20.666666666666668, | |
| "epoch": 0.45368620037807184, | |
| "grad_norm": 0.13737693428993225, | |
| "kl": 0.008511225382486979, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0158, | |
| "num_tokens": 8781392.0, | |
| "reward": 0.6430213848749796, | |
| "reward_std": 0.5739699502786001, | |
| "rewards/get_embedding_sim/mean": 0.3461463352044423, | |
| "rewards/get_embedding_sim/std": 0.07520903646945953, | |
| "rewards/reward_num_unique_chars/mean": 0.09946840691069762, | |
| "rewards/reward_num_unique_chars/std": 0.26744696994622547, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.045138888888888874, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 882.3333333333334, | |
| "completions/mean_length": 182.24132283528647, | |
| "completions/mean_terminated_length": 142.2494913736979, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "epoch": 0.499054820415879, | |
| "grad_norm": 0.09873297065496445, | |
| "kl": 0.012536366780598959, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0128, | |
| "num_tokens": 9570790.0, | |
| "reward": 0.6741114258766174, | |
| "reward_std": 0.6212253371874491, | |
| "rewards/get_embedding_sim/mean": 0.35900717973709106, | |
| "rewards/get_embedding_sim/std": 0.0736292873819669, | |
| "rewards/reward_num_unique_chars/mean": 0.10573149348298709, | |
| "rewards/reward_num_unique_chars/std": 0.3019101023674011, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02864583333333337, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 906.0, | |
| "completions/mean_length": 177.17447916666666, | |
| "completions/mean_terminated_length": 152.01558430989584, | |
| "completions/min_length": 13.333333333333334, | |
| "completions/min_terminated_length": 13.333333333333334, | |
| "epoch": 0.5444234404536862, | |
| "grad_norm": 0.11605791002511978, | |
| "kl": 0.025735855102539062, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0051, | |
| "num_tokens": 10355503.0, | |
| "reward": 0.5276843309402466, | |
| "reward_std": 0.502232551574707, | |
| "rewards/get_embedding_sim/mean": 0.3662259578704834, | |
| "rewards/get_embedding_sim/std": 0.08873194952805837, | |
| "rewards/reward_num_unique_chars/mean": 0.054056490461031594, | |
| "rewards/reward_num_unique_chars/std": 0.22509411970774332, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033854166666666664, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 854.3333333333334, | |
| "completions/mean_length": 166.70486450195312, | |
| "completions/mean_terminated_length": 136.90866088867188, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.5897920604914934, | |
| "grad_norm": 4.433223724365234, | |
| "kl": 0.2714697519938151, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0254, | |
| "num_tokens": 11137371.0, | |
| "reward": 1.016512393951416, | |
| "reward_std": 0.7703921596209208, | |
| "rewards/get_embedding_sim/mean": 0.3654707372188568, | |
| "rewards/get_embedding_sim/std": 0.09141946583986282, | |
| "rewards/reward_num_unique_chars/mean": 0.21773314972718558, | |
| "rewards/reward_num_unique_chars/std": 0.3968968590100606, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 923.0, | |
| "completions/mean_length": 189.52778116861978, | |
| "completions/mean_terminated_length": 164.9041544596354, | |
| "completions/min_length": 23.333333333333332, | |
| "completions/min_terminated_length": 23.333333333333332, | |
| "epoch": 0.6351606805293005, | |
| "grad_norm": 0.09748831391334534, | |
| "kl": 0.030905405680338543, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "num_tokens": 11945531.0, | |
| "reward": 0.7334451675415039, | |
| "reward_std": 0.6726242105166117, | |
| "rewards/get_embedding_sim/mean": 0.3584451178709666, | |
| "rewards/get_embedding_sim/std": 0.09209247678518295, | |
| "rewards/reward_num_unique_chars/mean": 0.12500000248352686, | |
| "rewards/reward_num_unique_chars/std": 0.32361265023549396, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02690972222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 825.6666666666666, | |
| "completions/mean_length": 168.27778116861978, | |
| "completions/mean_terminated_length": 144.8372548421224, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.6805293005671077, | |
| "grad_norm": 0.0829065814614296, | |
| "kl": 0.028959910074869793, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0053, | |
| "num_tokens": 12715243.0, | |
| "reward": 0.77097487449646, | |
| "reward_std": 0.5438057780265808, | |
| "rewards/get_embedding_sim/mean": 0.3829539120197296, | |
| "rewards/get_embedding_sim/std": 0.09809910257657369, | |
| "rewards/reward_num_unique_chars/mean": 0.13059413681427637, | |
| "rewards/reward_num_unique_chars/std": 0.31896015008290607, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.022569444444444458, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 828.0, | |
| "completions/mean_length": 157.07465616861978, | |
| "completions/mean_terminated_length": 136.9731216430664, | |
| "completions/min_length": 10.666666666666666, | |
| "completions/min_terminated_length": 10.666666666666666, | |
| "epoch": 0.725897920604915, | |
| "grad_norm": 0.09397952258586884, | |
| "kl": 0.044497172037760414, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0032, | |
| "num_tokens": 13477473.0, | |
| "reward": 0.8347963392734528, | |
| "reward_std": 0.6062483191490173, | |
| "rewards/get_embedding_sim/mean": 0.3972962299982707, | |
| "rewards/get_embedding_sim/std": 0.10549474010864894, | |
| "rewards/reward_num_unique_chars/mean": 0.14635550851623216, | |
| "rewards/reward_num_unique_chars/std": 0.3286245862642924, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.7712665406427222, | |
| "grad_norm": 1.0158724784851074, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0131, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.7712665406427222, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.1023065476190476, | |
| "eval_completions/max_length": 850.0357142857143, | |
| "eval_completions/max_terminated_length": 649.3214285714286, | |
| "eval_completions/mean_length": 221.02716418675013, | |
| "eval_completions/mean_terminated_length": 136.32026665551322, | |
| "eval_completions/min_length": 25.892857142857142, | |
| "eval_completions/min_terminated_length": 25.892857142857142, | |
| "eval_kl": 0.05330167497907366, | |
| "eval_loss": 0.022182755172252655, | |
| "eval_num_tokens": 14225380.0, | |
| "eval_reward": 0.6993682932640825, | |
| "eval_reward_std": 0.6098802514108164, | |
| "eval_rewards/get_embedding_sim/mean": 0.408073626724737, | |
| "eval_rewards/get_embedding_sim/std": 0.08180103763671857, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.09725111100955733, | |
| "eval_rewards/reward_num_unique_chars/std": 0.1932054047605821, | |
| "eval_runtime": 6593.9311, | |
| "eval_samples_per_second": 0.008, | |
| "eval_steps_per_second": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.031249999999999983, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 923.5, | |
| "completions/mean_length": 173.74523162841797, | |
| "completions/mean_terminated_length": 146.51427459716797, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.8166351606805293, | |
| "grad_norm": 0.13796095550060272, | |
| "kl": 0.061681111653645836, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0132, | |
| "num_tokens": 15035350.0, | |
| "reward": 0.7754727999369303, | |
| "reward_std": 0.6537116318941116, | |
| "rewards/get_embedding_sim/mean": 0.3978685835997264, | |
| "rewards/get_embedding_sim/std": 0.10740451887249947, | |
| "rewards/reward_num_unique_chars/mean": 0.1262344146768252, | |
| "rewards/reward_num_unique_chars/std": 0.31391797463099164, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032986111111111126, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 895.6666666666666, | |
| "completions/mean_length": 179.25955708821616, | |
| "completions/mean_terminated_length": 150.37726338704428, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 0.8620037807183365, | |
| "grad_norm": 9.711634635925293, | |
| "kl": 0.30323028564453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0171, | |
| "num_tokens": 15831681.0, | |
| "reward": 0.8330511649449667, | |
| "reward_std": 0.7141762177149454, | |
| "rewards/get_embedding_sim/mean": 0.392946978410085, | |
| "rewards/get_embedding_sim/std": 0.10180553545554479, | |
| "rewards/reward_num_unique_chars/mean": 0.14735475679238638, | |
| "rewards/reward_num_unique_chars/std": 0.34930500388145447, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03472222222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 920.3333333333334, | |
| "completions/mean_length": 175.86719258626303, | |
| "completions/mean_terminated_length": 145.65855916341147, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.9073724007561437, | |
| "grad_norm": 0.11200369894504547, | |
| "kl": 0.056910196940104164, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0094, | |
| "num_tokens": 16616760.0, | |
| "reward": 0.9081356525421143, | |
| "reward_std": 0.7194747726122538, | |
| "rewards/get_embedding_sim/mean": 0.40292728940645856, | |
| "rewards/get_embedding_sim/std": 0.11402523269255956, | |
| "rewards/reward_num_unique_chars/mean": 0.17317021762331328, | |
| "rewards/reward_num_unique_chars/std": 0.34040839473406476, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013888888888888914, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 768.3333333333334, | |
| "completions/mean_length": 162.06250508626303, | |
| "completions/mean_terminated_length": 149.80463155110678, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.9527410207939508, | |
| "grad_norm": 0.08261118829250336, | |
| "kl": 0.08898417154947917, | |
| "learning_rate": 1e-06, | |
| "loss": 0.012, | |
| "num_tokens": 17393280.0, | |
| "reward": 0.8178274830182394, | |
| "reward_std": 0.7919754783312479, | |
| "rewards/get_embedding_sim/mean": 0.4141816198825836, | |
| "rewards/get_embedding_sim/std": 0.11467475444078445, | |
| "rewards/reward_num_unique_chars/mean": 0.13454861069718996, | |
| "rewards/reward_num_unique_chars/std": 0.33077992002169293, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0385656130268199, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 884.3333333333334, | |
| "completions/mean_length": 183.8406778971354, | |
| "completions/mean_terminated_length": 149.68450419108072, | |
| "completions/min_length": 9.333333333333334, | |
| "completions/min_terminated_length": 9.333333333333334, | |
| "epoch": 0.998109640831758, | |
| "grad_norm": 0.07706479728221893, | |
| "kl": 0.040013631184895836, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0204, | |
| "num_tokens": 18173273.0, | |
| "reward": 0.9729219675064087, | |
| "reward_std": 0.8207030693689982, | |
| "rewards/get_embedding_sim/mean": 0.4208385944366455, | |
| "rewards/get_embedding_sim/std": 0.11511148760716121, | |
| "rewards/reward_num_unique_chars/mean": 0.18582184116045633, | |
| "rewards/reward_num_unique_chars/std": 0.3880065679550171, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.036458333333333336, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 975.0, | |
| "completions/mean_length": 182.76996866861978, | |
| "completions/mean_terminated_length": 150.8812713623047, | |
| "completions/min_length": 8.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 1.0453686200378072, | |
| "grad_norm": 0.12481274455785751, | |
| "kl": 0.06750742594401042, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0078, | |
| "num_tokens": 18973648.0, | |
| "reward": 0.7922607262929281, | |
| "reward_std": 0.778564453125, | |
| "rewards/get_embedding_sim/mean": 0.43028150995572406, | |
| "rewards/get_embedding_sim/std": 0.11360271523396175, | |
| "rewards/reward_num_unique_chars/mean": 0.12104393541812897, | |
| "rewards/reward_num_unique_chars/std": 0.3201603094736735, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.027777777777777752, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 924.0, | |
| "completions/mean_length": 164.88194529215494, | |
| "completions/mean_terminated_length": 140.49947357177734, | |
| "completions/min_length": 10.333333333333334, | |
| "completions/min_terminated_length": 10.333333333333334, | |
| "epoch": 1.0907372400756143, | |
| "grad_norm": 0.27184560894966125, | |
| "kl": 0.10882568359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0144, | |
| "num_tokens": 19738968.0, | |
| "reward": 0.9275963107744852, | |
| "reward_std": 0.841428816318512, | |
| "rewards/get_embedding_sim/mean": 0.4406171242396037, | |
| "rewards/get_embedding_sim/std": 0.11815810203552246, | |
| "rewards/reward_num_unique_chars/mean": 0.162567267815272, | |
| "rewards/reward_num_unique_chars/std": 0.36787914236386615, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.035590277777777755, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 858.0, | |
| "completions/mean_length": 176.44878641764322, | |
| "completions/mean_terminated_length": 145.19543965657553, | |
| "completions/min_length": 8.333333333333334, | |
| "completions/min_terminated_length": 8.333333333333334, | |
| "epoch": 1.1361058601134215, | |
| "grad_norm": 0.11772840470075607, | |
| "kl": 0.132965087890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0071, | |
| "num_tokens": 20526877.0, | |
| "reward": 0.7764408787091573, | |
| "reward_std": 0.7020115653673807, | |
| "rewards/get_embedding_sim/mean": 0.4378991524378459, | |
| "rewards/get_embedding_sim/std": 0.11236891647179921, | |
| "rewards/reward_num_unique_chars/mean": 0.11354367559154828, | |
| "rewards/reward_num_unique_chars/std": 0.31067532300949097, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.030381944444444458, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 938.6666666666666, | |
| "completions/mean_length": 181.06771341959634, | |
| "completions/mean_terminated_length": 154.67694600423178, | |
| "completions/min_length": 10.333333333333334, | |
| "completions/min_terminated_length": 10.333333333333334, | |
| "epoch": 1.1814744801512287, | |
| "grad_norm": 0.21804682910442352, | |
| "kl": 0.09952545166015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0079, | |
| "num_tokens": 21325291.0, | |
| "reward": 0.8087505102157593, | |
| "reward_std": 0.7311090230941772, | |
| "rewards/get_embedding_sim/mean": 0.4597921272118886, | |
| "rewards/get_embedding_sim/std": 0.12011716266473134, | |
| "rewards/reward_num_unique_chars/mean": 0.11631944527228673, | |
| "rewards/reward_num_unique_chars/std": 0.3020235498746236, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1007.3333333333334, | |
| "completions/mean_length": 206.53125508626303, | |
| "completions/mean_terminated_length": 182.36050415039062, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 1.2268431001890359, | |
| "grad_norm": 0.1415005475282669, | |
| "kl": 0.170013427734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0191, | |
| "num_tokens": 22125439.0, | |
| "reward": 0.8599557876586914, | |
| "reward_std": 0.6892009973526001, | |
| "rewards/get_embedding_sim/mean": 0.45630990465482074, | |
| "rewards/get_embedding_sim/std": 0.11122701565424602, | |
| "rewards/reward_num_unique_chars/mean": 0.1345486119389534, | |
| "rewards/reward_num_unique_chars/std": 0.3128484884897868, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.039930555555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 938.6666666666666, | |
| "completions/mean_length": 214.3307342529297, | |
| "completions/mean_terminated_length": 180.5359090169271, | |
| "completions/min_length": 12.666666666666666, | |
| "completions/min_terminated_length": 12.666666666666666, | |
| "epoch": 1.272211720226843, | |
| "grad_norm": 0.11570374667644501, | |
| "kl": 0.06879933675130208, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0228, | |
| "num_tokens": 22955116.0, | |
| "reward": 0.9702663818995158, | |
| "reward_std": 0.7755107680956522, | |
| "rewards/get_embedding_sim/mean": 0.47287049889564514, | |
| "rewards/get_embedding_sim/std": 0.11713164548079173, | |
| "rewards/reward_num_unique_chars/mean": 0.16612045466899872, | |
| "rewards/reward_num_unique_chars/std": 0.3707600136597951, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032986111111111126, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 982.0, | |
| "completions/mean_length": 191.13976033528647, | |
| "completions/mean_terminated_length": 162.73322041829428, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 1.3175803402646502, | |
| "grad_norm": 0.19582344591617584, | |
| "kl": 0.08817799886067708, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0186, | |
| "num_tokens": 23758269.0, | |
| "reward": 0.8809124827384949, | |
| "reward_std": 0.7492716908454895, | |
| "rewards/get_embedding_sim/mean": 0.4720582564671834, | |
| "rewards/get_embedding_sim/std": 0.11799828956524532, | |
| "rewards/reward_num_unique_chars/mean": 0.13729924211899439, | |
| "rewards/reward_num_unique_chars/std": 0.3396035333474477, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032118055555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 898.6666666666666, | |
| "completions/mean_length": 155.8697967529297, | |
| "completions/mean_terminated_length": 127.03400421142578, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 1.3629489603024574, | |
| "grad_norm": 0.10882719606161118, | |
| "kl": 0.08435567220052083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0091, | |
| "num_tokens": 24522615.0, | |
| "reward": 0.9096565643946329, | |
| "reward_std": 0.7040959596633911, | |
| "rewards/get_embedding_sim/mean": 0.4825731615225474, | |
| "rewards/get_embedding_sim/std": 0.11133117477099101, | |
| "rewards/reward_num_unique_chars/mean": 0.1426701620221138, | |
| "rewards/reward_num_unique_chars/std": 0.3476703961690267, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 879.3333333333334, | |
| "completions/mean_length": 171.0026092529297, | |
| "completions/mean_terminated_length": 145.80119832356772, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 1.4083175803402646, | |
| "grad_norm": 0.16034463047981262, | |
| "kl": 0.0664825439453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "num_tokens": 25303194.0, | |
| "reward": 1.1405272086461384, | |
| "reward_std": 0.8232053716977438, | |
| "rewards/get_embedding_sim/mean": 0.4634438355763753, | |
| "rewards/get_embedding_sim/std": 0.11458807935317357, | |
| "rewards/reward_num_unique_chars/mean": 0.22588256498177847, | |
| "rewards/reward_num_unique_chars/std": 0.4179500639438629, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.038194444444444454, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 954.3333333333334, | |
| "completions/mean_length": 197.03039042154947, | |
| "completions/mean_terminated_length": 164.30577087402344, | |
| "completions/min_length": 8.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 1.4536862003780717, | |
| "grad_norm": 1.1371827125549316, | |
| "kl": 0.16266377766927084, | |
| "learning_rate": 1e-06, | |
| "loss": 0.018, | |
| "num_tokens": 26107613.0, | |
| "reward": 0.8050010005633036, | |
| "reward_std": 0.7417031327883402, | |
| "rewards/get_embedding_sim/mean": 0.4768759409586589, | |
| "rewards/get_embedding_sim/std": 0.1163704867164294, | |
| "rewards/reward_num_unique_chars/mean": 0.10968360553185146, | |
| "rewards/reward_num_unique_chars/std": 0.29880866408348083, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037326388888888916, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1002.6666666666666, | |
| "completions/mean_length": 184.1883748372396, | |
| "completions/mean_terminated_length": 151.9073689778646, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 1.499054820415879, | |
| "grad_norm": 0.08430308103561401, | |
| "kl": 0.07194010416666667, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0169, | |
| "num_tokens": 26899254.0, | |
| "reward": 0.8056914011637369, | |
| "reward_std": 0.7580650448799133, | |
| "rewards/get_embedding_sim/mean": 0.48798303802808124, | |
| "rewards/get_embedding_sim/std": 0.11710481345653534, | |
| "rewards/reward_num_unique_chars/mean": 0.106216366092364, | |
| "rewards/reward_num_unique_chars/std": 0.3037123878796895, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.544423440453686, | |
| "grad_norm": 0.11377694457769394, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0254, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.544423440453686, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.10007440476190474, | |
| "eval_completions/max_length": 869.5892857142857, | |
| "eval_completions/max_terminated_length": 639.0535714285714, | |
| "eval_completions/mean_length": 225.3545457976205, | |
| "eval_completions/mean_terminated_length": 140.344126360757, | |
| "eval_completions/min_length": 19.357142857142858, | |
| "eval_completions/min_terminated_length": 19.357142857142858, | |
| "eval_kl": 0.07553209577287946, | |
| "eval_loss": 0.037391725927591324, | |
| "eval_num_tokens": 27703933.0, | |
| "eval_reward": 0.7799429536930153, | |
| "eval_reward_std": 0.6953434666751751, | |
| "eval_rewards/get_embedding_sim/mean": 0.47748757898807526, | |
| "eval_rewards/get_embedding_sim/std": 0.0975222562971924, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.10085803000921649, | |
| "eval_rewards/reward_num_unique_chars/std": 0.22144863993993827, | |
| "eval_runtime": 5743.3373, | |
| "eval_samples_per_second": 0.01, | |
| "eval_steps_per_second": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0390625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 905.3333333333334, | |
| "completions/mean_length": 192.20443216959634, | |
| "completions/mean_terminated_length": 158.34148915608725, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 1.5897920604914932, | |
| "grad_norm": 0.14566491544246674, | |
| "kl": 0.0706634521484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0208, | |
| "num_tokens": 28512525.0, | |
| "reward": 0.8942790528138479, | |
| "reward_std": 0.7560157477855682, | |
| "rewards/get_embedding_sim/mean": 0.47891440490881604, | |
| "rewards/get_embedding_sim/std": 0.11716391022006671, | |
| "rewards/reward_num_unique_chars/mean": 0.1388231466213862, | |
| "rewards/reward_num_unique_chars/std": 0.3291383981704712, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03472222222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.0, | |
| "completions/mean_length": 205.2352498372396, | |
| "completions/mean_terminated_length": 176.03035990397134, | |
| "completions/min_length": 5.333333333333333, | |
| "completions/min_terminated_length": 5.333333333333333, | |
| "epoch": 1.6351606805293004, | |
| "grad_norm": 0.08621126413345337, | |
| "kl": 0.08469390869140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.027, | |
| "num_tokens": 29338780.0, | |
| "reward": 0.8891541957855225, | |
| "reward_std": 0.8206586241722107, | |
| "rewards/get_embedding_sim/mean": 0.4672791560490926, | |
| "rewards/get_embedding_sim/std": 0.11891171584526698, | |
| "rewards/reward_num_unique_chars/mean": 0.14074058582385382, | |
| "rewards/reward_num_unique_chars/std": 0.34693758686383563, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032118055555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 932.0, | |
| "completions/mean_length": 195.6493123372396, | |
| "completions/mean_terminated_length": 168.39810689290366, | |
| "completions/min_length": 8.333333333333334, | |
| "completions/min_terminated_length": 8.333333333333334, | |
| "epoch": 1.6805293005671076, | |
| "grad_norm": 0.09061074256896973, | |
| "kl": 0.061197916666666664, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0088, | |
| "num_tokens": 30140024.0, | |
| "reward": 0.8708882729212443, | |
| "reward_std": 0.6558753848075867, | |
| "rewards/get_embedding_sim/mean": 0.49067989985148114, | |
| "rewards/get_embedding_sim/std": 0.11308762182792027, | |
| "rewards/reward_num_unique_chars/mean": 0.12729256972670555, | |
| "rewards/reward_num_unique_chars/std": 0.3063565840323766, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 971.3333333333334, | |
| "completions/mean_length": 168.0746612548828, | |
| "completions/mean_terminated_length": 142.76580810546875, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 1.725897920604915, | |
| "grad_norm": 0.07783554494380951, | |
| "kl": 0.0714569091796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0222, | |
| "num_tokens": 30914926.0, | |
| "reward": 0.9490655660629272, | |
| "reward_std": 0.8103155891100565, | |
| "rewards/get_embedding_sim/mean": 0.4881279369195302, | |
| "rewards/get_embedding_sim/std": 0.1105448305606842, | |
| "rewards/reward_num_unique_chars/mean": 0.1541931927204132, | |
| "rewards/reward_num_unique_chars/std": 0.3540232678254445, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.036458333333333336, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 918.3333333333334, | |
| "completions/mean_length": 165.41146341959634, | |
| "completions/mean_terminated_length": 132.9504165649414, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 1.7712665406427222, | |
| "grad_norm": 0.08974138647317886, | |
| "kl": 0.08209737141927083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0198, | |
| "num_tokens": 31681096.0, | |
| "reward": 0.8335268894831339, | |
| "reward_std": 0.716159999370575, | |
| "rewards/get_embedding_sim/mean": 0.4897768298784892, | |
| "rewards/get_embedding_sim/std": 0.12030263990163803, | |
| "rewards/reward_num_unique_chars/mean": 0.11484397575259209, | |
| "rewards/reward_num_unique_chars/std": 0.30110697944959003, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.040798611111111084, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 938.6666666666666, | |
| "completions/mean_length": 202.7447967529297, | |
| "completions/mean_terminated_length": 167.81108601888022, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 1.8166351606805293, | |
| "grad_norm": 0.10199436545372009, | |
| "kl": 0.07517751057942708, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0319, | |
| "num_tokens": 32496610.0, | |
| "reward": 0.860044519106547, | |
| "reward_std": 0.7602864901224772, | |
| "rewards/get_embedding_sim/mean": 0.4876486460367839, | |
| "rewards/get_embedding_sim/std": 0.11599687735239665, | |
| "rewards/reward_num_unique_chars/mean": 0.12465803200999896, | |
| "rewards/reward_num_unique_chars/std": 0.3174656927585602, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1003.0, | |
| "completions/mean_length": 187.55816141764322, | |
| "completions/mean_terminated_length": 160.59460957845053, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 1.8620037807183365, | |
| "grad_norm": 1.411887526512146, | |
| "kl": 0.13691202799479166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0295, | |
| "num_tokens": 33302501.0, | |
| "reward": 0.8890740275382996, | |
| "reward_std": 0.8625878095626831, | |
| "rewards/get_embedding_sim/mean": 0.4724073112010956, | |
| "rewards/get_embedding_sim/std": 0.11733246843020122, | |
| "rewards/reward_num_unique_chars/mean": 0.13933624823888144, | |
| "rewards/reward_num_unique_chars/std": 0.34191163380940753, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025173611111111088, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 950.0, | |
| "completions/mean_length": 171.25347900390625, | |
| "completions/mean_terminated_length": 149.3338419596354, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 1.9073724007561437, | |
| "grad_norm": 0.10643448680639267, | |
| "kl": 0.0879974365234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0193, | |
| "num_tokens": 34082265.0, | |
| "reward": 1.0310540199279785, | |
| "reward_std": 0.83027583360672, | |
| "rewards/get_embedding_sim/mean": 0.4763664702574412, | |
| "rewards/get_embedding_sim/std": 0.1186542958021164, | |
| "rewards/reward_num_unique_chars/mean": 0.1909722164273262, | |
| "rewards/reward_num_unique_chars/std": 0.3733387490113576, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.020833333333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1008.3333333333334, | |
| "completions/mean_length": 175.5920206705729, | |
| "completions/mean_terminated_length": 157.43394470214844, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 1.9527410207939508, | |
| "grad_norm": 0.083248071372509, | |
| "kl": 0.07168070475260417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0234, | |
| "num_tokens": 34874371.0, | |
| "reward": 0.9281045397122701, | |
| "reward_std": 0.9109238783518473, | |
| "rewards/get_embedding_sim/mean": 0.4853961269060771, | |
| "rewards/get_embedding_sim/std": 0.12320189674695332, | |
| "rewards/reward_num_unique_chars/mean": 0.14780289431413016, | |
| "rewards/reward_num_unique_chars/std": 0.35039229194323224, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026041666666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 911.6666666666666, | |
| "completions/mean_length": 177.00694783528647, | |
| "completions/mean_terminated_length": 154.32052103678384, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 1.998109640831758, | |
| "grad_norm": 0.0692070946097374, | |
| "kl": 0.07420603434244792, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0336, | |
| "num_tokens": 35655291.0, | |
| "reward": 1.0533938805262248, | |
| "reward_std": 0.9196257392565409, | |
| "rewards/get_embedding_sim/mean": 0.4934980074564616, | |
| "rewards/get_embedding_sim/std": 0.11738153547048569, | |
| "rewards/reward_num_unique_chars/mean": 0.1887365331252416, | |
| "rewards/reward_num_unique_chars/std": 0.3903753161430359, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 963.3333333333334, | |
| "completions/mean_length": 180.38281758626303, | |
| "completions/mean_terminated_length": 155.4217987060547, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 2.045368620037807, | |
| "grad_norm": 0.1576370894908905, | |
| "kl": 0.11295064290364583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0335, | |
| "num_tokens": 36452916.0, | |
| "reward": 0.9172398447990417, | |
| "reward_std": 0.9673983256022135, | |
| "rewards/get_embedding_sim/mean": 0.48755229512850445, | |
| "rewards/get_embedding_sim/std": 0.11796744416157405, | |
| "rewards/reward_num_unique_chars/mean": 0.14384527256091437, | |
| "rewards/reward_num_unique_chars/std": 0.3497835397720337, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026909722222222248, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 972.3333333333334, | |
| "completions/mean_length": 170.28646341959634, | |
| "completions/mean_terminated_length": 146.62708536783853, | |
| "completions/min_length": 5.333333333333333, | |
| "completions/min_terminated_length": 5.333333333333333, | |
| "epoch": 2.0907372400756143, | |
| "grad_norm": 0.5436683893203735, | |
| "kl": 0.1591796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0342, | |
| "num_tokens": 37224462.0, | |
| "reward": 1.0363986889521282, | |
| "reward_std": 0.9765956203142802, | |
| "rewards/get_embedding_sim/mean": 0.5051485598087311, | |
| "rewards/get_embedding_sim/std": 0.11746565749247868, | |
| "rewards/reward_num_unique_chars/mean": 0.17818759878476462, | |
| "rewards/reward_num_unique_chars/std": 0.3826761841773987, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.038194444444444454, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 928.0, | |
| "completions/mean_length": 174.79948933919272, | |
| "completions/mean_terminated_length": 140.99752298990884, | |
| "completions/min_length": 8.333333333333334, | |
| "completions/min_terminated_length": 8.333333333333334, | |
| "epoch": 2.1361058601134215, | |
| "grad_norm": 0.11291619390249252, | |
| "kl": 0.13877360026041666, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0321, | |
| "num_tokens": 38010471.0, | |
| "reward": 0.936789353688558, | |
| "reward_std": 0.8961972991625468, | |
| "rewards/get_embedding_sim/mean": 0.49408095081647235, | |
| "rewards/get_embedding_sim/std": 0.11985934029022853, | |
| "rewards/reward_num_unique_chars/mean": 0.14794171353181204, | |
| "rewards/reward_num_unique_chars/std": 0.3542039096355438, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.055555555555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 905.0, | |
| "completions/mean_length": 189.78819783528647, | |
| "completions/mean_terminated_length": 140.69151306152344, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.1814744801512287, | |
| "grad_norm": 0.12472045421600342, | |
| "kl": 0.2596638997395833, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0503, | |
| "num_tokens": 38811923.0, | |
| "reward": 1.0600279172261555, | |
| "reward_std": 0.9520064989725748, | |
| "rewards/get_embedding_sim/mean": 0.5053403675556183, | |
| "rewards/get_embedding_sim/std": 0.12481692930062611, | |
| "rewards/reward_num_unique_chars/mean": 0.18512474993864694, | |
| "rewards/reward_num_unique_chars/std": 0.3818445106347402, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03559027777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 887.3333333333334, | |
| "completions/mean_length": 186.22656758626303, | |
| "completions/mean_terminated_length": 155.0603485107422, | |
| "completions/min_length": 8.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 2.226843100189036, | |
| "grad_norm": 0.2647012174129486, | |
| "kl": 0.14057413736979166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0445, | |
| "num_tokens": 39616280.0, | |
| "reward": 1.021846095720927, | |
| "reward_std": 1.002595583597819, | |
| "rewards/get_embedding_sim/mean": 0.48799189925193787, | |
| "rewards/get_embedding_sim/std": 0.12229083478450775, | |
| "rewards/reward_num_unique_chars/mean": 0.17855327824751535, | |
| "rewards/reward_num_unique_chars/std": 0.3721735179424286, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01996527777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 975.6666666666666, | |
| "completions/mean_length": 152.78299458821616, | |
| "completions/mean_terminated_length": 135.05128479003906, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 2.272211720226843, | |
| "grad_norm": 0.07731039077043533, | |
| "kl": 0.15080769856770834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0334, | |
| "num_tokens": 40382110.0, | |
| "reward": 1.0974433422088623, | |
| "reward_std": 0.9521243373552958, | |
| "rewards/get_embedding_sim/mean": 0.516714076201121, | |
| "rewards/get_embedding_sim/std": 0.12718145549297333, | |
| "rewards/reward_num_unique_chars/mean": 0.1947579632202784, | |
| "rewards/reward_num_unique_chars/std": 0.3964957594871521, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.31758034026465, | |
| "grad_norm": 0.9586585760116577, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0347, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.31758034026465, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.07328869047619047, | |
| "eval_completions/max_length": 872.6071428571429, | |
| "eval_completions/max_terminated_length": 616.6071428571429, | |
| "eval_completions/mean_length": 177.7972524847303, | |
| "eval_completions/mean_terminated_length": 111.63820842334202, | |
| "eval_completions/min_length": 14.25, | |
| "eval_completions/min_terminated_length": 14.25, | |
| "eval_kl": 0.15661403111049108, | |
| "eval_loss": 0.05365554988384247, | |
| "eval_num_tokens": 41144925.0, | |
| "eval_reward": 1.0100113941090447, | |
| "eval_reward_std": 0.9766364488750696, | |
| "eval_rewards/get_embedding_sim/mean": 0.5055470722062247, | |
| "eval_rewards/get_embedding_sim/std": 0.10236791674313801, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.16864551766775548, | |
| "eval_rewards/reward_num_unique_chars/std": 0.3155075231833117, | |
| "eval_runtime": 5610.1574, | |
| "eval_samples_per_second": 0.01, | |
| "eval_steps_per_second": 0.0, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028211805555555563, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 850.1666666666666, | |
| "completions/mean_length": 165.75738271077475, | |
| "completions/mean_terminated_length": 140.97229131062826, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 2.3629489603024574, | |
| "grad_norm": 0.13255949318408966, | |
| "kl": 0.1971893310546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.037, | |
| "num_tokens": 41941119.0, | |
| "reward": 1.0157166123390198, | |
| "reward_std": 0.9526964128017426, | |
| "rewards/get_embedding_sim/mean": 0.5066019793351492, | |
| "rewards/get_embedding_sim/std": 0.11097632969419162, | |
| "rewards/reward_num_unique_chars/mean": 0.17024830107887587, | |
| "rewards/reward_num_unique_chars/std": 0.37277790407339734, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026909722222222248, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 911.0, | |
| "completions/mean_length": 145.26649729410806, | |
| "completions/mean_terminated_length": 121.1592280069987, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 2.4083175803402646, | |
| "grad_norm": 0.10361862182617188, | |
| "kl": 0.1372528076171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0383, | |
| "num_tokens": 42690418.0, | |
| "reward": 1.2976791461308796, | |
| "reward_std": 1.1292773286501567, | |
| "rewards/get_embedding_sim/mean": 0.5060124099254608, | |
| "rewards/get_embedding_sim/std": 0.11317289372285207, | |
| "rewards/reward_num_unique_chars/mean": 0.26442377765973407, | |
| "rewards/reward_num_unique_chars/std": 0.43167150020599365, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026909722222222248, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 838.3333333333334, | |
| "completions/mean_length": 145.79688008626303, | |
| "completions/mean_terminated_length": 121.46813710530598, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.4536862003780717, | |
| "grad_norm": 0.0873284786939621, | |
| "kl": 0.17525736490885416, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0382, | |
| "num_tokens": 43439080.0, | |
| "reward": 1.166001319885254, | |
| "reward_std": 0.998184601465861, | |
| "rewards/get_embedding_sim/mean": 0.5045428971449534, | |
| "rewards/get_embedding_sim/std": 0.12551463643709818, | |
| "rewards/reward_num_unique_chars/mean": 0.22076034545898438, | |
| "rewards/reward_num_unique_chars/std": 0.40754825870196026, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028645833333333332, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 890.0, | |
| "completions/mean_length": 171.04340616861978, | |
| "completions/mean_terminated_length": 145.85011291503906, | |
| "completions/min_length": 8.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 2.499054820415879, | |
| "grad_norm": 0.0893421620130539, | |
| "kl": 0.1564788818359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.039, | |
| "num_tokens": 44225946.0, | |
| "reward": 1.0508646965026855, | |
| "reward_std": 1.0311030149459839, | |
| "rewards/get_embedding_sim/mean": 0.49878130356470746, | |
| "rewards/get_embedding_sim/std": 0.12252787003914516, | |
| "rewards/reward_num_unique_chars/mean": 0.18418416877587637, | |
| "rewards/reward_num_unique_chars/std": 0.38810135920842487, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02256944444444442, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 859.3333333333334, | |
| "completions/mean_length": 149.57465616861978, | |
| "completions/mean_terminated_length": 129.38720703125, | |
| "completions/min_length": 5.333333333333333, | |
| "completions/min_terminated_length": 5.333333333333333, | |
| "epoch": 2.544423440453686, | |
| "grad_norm": 0.11376336216926575, | |
| "kl": 0.17649332682291666, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0415, | |
| "num_tokens": 44975264.0, | |
| "reward": 1.2923760414123535, | |
| "reward_std": 1.15834375222524, | |
| "rewards/get_embedding_sim/mean": 0.5293551087379456, | |
| "rewards/get_embedding_sim/std": 0.12636979669332504, | |
| "rewards/reward_num_unique_chars/mean": 0.255620613694191, | |
| "rewards/reward_num_unique_chars/std": 0.4349779784679413, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021701388888888878, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 920.6666666666666, | |
| "completions/mean_length": 151.2604217529297, | |
| "completions/mean_terminated_length": 131.89202372233072, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 2.5897920604914932, | |
| "grad_norm": 0.1699657291173935, | |
| "kl": 0.176025390625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0416, | |
| "num_tokens": 45727292.0, | |
| "reward": 1.1844958066940308, | |
| "reward_std": 1.0816868146260579, | |
| "rewards/get_embedding_sim/mean": 0.5048082073529562, | |
| "rewards/get_embedding_sim/std": 0.11827733864386876, | |
| "rewards/reward_num_unique_chars/mean": 0.22690473993619284, | |
| "rewards/reward_num_unique_chars/std": 0.41608301798502606, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013888888888888876, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 993.0, | |
| "completions/mean_length": 140.05816650390625, | |
| "completions/mean_terminated_length": 127.6211166381836, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.6351606805293004, | |
| "grad_norm": 0.08062685281038284, | |
| "kl": 0.240142822265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0377, | |
| "num_tokens": 46478463.0, | |
| "reward": 1.0848047733306885, | |
| "reward_std": 1.0851068099339802, | |
| "rewards/get_embedding_sim/mean": 0.5197005073229471, | |
| "rewards/get_embedding_sim/std": 0.11266261339187622, | |
| "rewards/reward_num_unique_chars/mean": 0.1886785626411438, | |
| "rewards/reward_num_unique_chars/std": 0.39136550823847455, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025173611111111088, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 857.0, | |
| "completions/mean_length": 142.6154530843099, | |
| "completions/mean_terminated_length": 119.86089833577473, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.6805293005671076, | |
| "grad_norm": 0.1051354631781578, | |
| "kl": 0.22475179036458334, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0389, | |
| "num_tokens": 47225476.0, | |
| "reward": 1.142371932665507, | |
| "reward_std": 1.070401946703593, | |
| "rewards/get_embedding_sim/mean": 0.49914271632830304, | |
| "rewards/get_embedding_sim/std": 0.11159212638934453, | |
| "rewards/reward_num_unique_chars/mean": 0.2148823787768682, | |
| "rewards/reward_num_unique_chars/std": 0.40812622507413227, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.018229166666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 900.0, | |
| "completions/mean_length": 128.2951431274414, | |
| "completions/mean_terminated_length": 111.62453969319661, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 2.7258979206049148, | |
| "grad_norm": 0.20546282827854156, | |
| "kl": 0.283447265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0403, | |
| "num_tokens": 47955752.0, | |
| "reward": 1.1857277949651082, | |
| "reward_std": 1.1706757545471191, | |
| "rewards/get_embedding_sim/mean": 0.513852725426356, | |
| "rewards/get_embedding_sim/std": 0.12434107561906178, | |
| "rewards/reward_num_unique_chars/mean": 0.23464342455069223, | |
| "rewards/reward_num_unique_chars/std": 0.42252803842226666, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01822916666666663, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 902.6666666666666, | |
| "completions/mean_length": 130.4401067097982, | |
| "completions/mean_terminated_length": 113.8047103881836, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.7712665406427224, | |
| "grad_norm": 0.08767585456371307, | |
| "kl": 0.4050394694010417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0431, | |
| "num_tokens": 48670211.0, | |
| "reward": 1.2997503280639648, | |
| "reward_std": 1.2144495646158855, | |
| "rewards/get_embedding_sim/mean": 0.5237086117267609, | |
| "rewards/get_embedding_sim/std": 0.1108636533220609, | |
| "rewards/reward_num_unique_chars/mean": 0.2586805572112401, | |
| "rewards/reward_num_unique_chars/std": 0.4370884597301483, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.023437500000000038, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 890.0, | |
| "completions/mean_length": 135.30035146077475, | |
| "completions/mean_terminated_length": 113.9994608561198, | |
| "completions/min_length": 5.666666666666667, | |
| "completions/min_terminated_length": 5.666666666666667, | |
| "epoch": 2.816635160680529, | |
| "grad_norm": 0.3577604591846466, | |
| "kl": 0.2592061360677083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0498, | |
| "num_tokens": 49402749.0, | |
| "reward": 1.2939318418502808, | |
| "reward_std": 1.2120266358057659, | |
| "rewards/get_embedding_sim/mean": 0.5204942027727762, | |
| "rewards/get_embedding_sim/std": 0.11977454274892807, | |
| "rewards/reward_num_unique_chars/mean": 0.2582635283470154, | |
| "rewards/reward_num_unique_chars/std": 0.43749914566675824, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013888888888888876, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 812.0, | |
| "completions/mean_length": 109.67014058430989, | |
| "completions/mean_terminated_length": 96.77264912923177, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 2.8620037807183367, | |
| "grad_norm": 0.1368367075920105, | |
| "kl": 0.3499857584635417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0381, | |
| "num_tokens": 50108497.0, | |
| "reward": 1.5523497263590496, | |
| "reward_std": 1.214170217514038, | |
| "rewards/get_embedding_sim/mean": 0.534120500087738, | |
| "rewards/get_embedding_sim/std": 0.10954815397659938, | |
| "rewards/reward_num_unique_chars/mean": 0.3397156894207001, | |
| "rewards/reward_num_unique_chars/std": 0.46985835830370587, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01909722222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 838.6666666666666, | |
| "completions/mean_length": 125.33333841959636, | |
| "completions/mean_terminated_length": 107.79783884684245, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.9073724007561434, | |
| "grad_norm": 1.6821552515029907, | |
| "kl": 0.4345550537109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0423, | |
| "num_tokens": 50831329.0, | |
| "reward": 1.3425734440485637, | |
| "reward_std": 1.191293756167094, | |
| "rewards/get_embedding_sim/mean": 0.5404900709788004, | |
| "rewards/get_embedding_sim/std": 0.11695743352174759, | |
| "rewards/reward_num_unique_chars/mean": 0.26780080795288086, | |
| "rewards/reward_num_unique_chars/std": 0.44234869877497357, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.009548611111111124, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 820.0, | |
| "completions/mean_length": 102.75347646077473, | |
| "completions/mean_terminated_length": 93.89341735839844, | |
| "completions/min_length": 8.666666666666666, | |
| "completions/min_terminated_length": 8.666666666666666, | |
| "epoch": 2.952741020793951, | |
| "grad_norm": 0.11723087728023529, | |
| "kl": 0.3047281901041667, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0332, | |
| "num_tokens": 51524645.0, | |
| "reward": 1.6216003100077312, | |
| "reward_std": 1.1864676475524902, | |
| "rewards/get_embedding_sim/mean": 0.5434751510620117, | |
| "rewards/get_embedding_sim/std": 0.12491280088822047, | |
| "rewards/reward_num_unique_chars/mean": 0.359375, | |
| "rewards/reward_num_unique_chars/std": 0.4740845561027527, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01848659003831421, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 832.0, | |
| "completions/mean_length": 131.5273691813151, | |
| "completions/mean_terminated_length": 114.69828796386719, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 2.998109640831758, | |
| "grad_norm": 0.10458555072546005, | |
| "kl": 0.2809855143229167, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0481, | |
| "num_tokens": 52251674.0, | |
| "reward": 1.4293763637542725, | |
| "reward_std": 1.2686160405476887, | |
| "rewards/get_embedding_sim/mean": 0.5335429906845093, | |
| "rewards/get_embedding_sim/std": 0.11212129394213359, | |
| "rewards/reward_num_unique_chars/mean": 0.29888081053892773, | |
| "rewards/reward_num_unique_chars/std": 0.4557340343793233, | |
| "step": 198 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 198, | |
| "num_input_tokens_seen": 52251674, | |
| "num_train_epochs": 3, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |