| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.998109640831758, | |
| "eval_steps": 51, | |
| "global_step": 198, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15104166666666666, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 975.3333333333334, | |
| "completions/mean_length": 312.0329996744792, | |
| "completions/mean_terminated_length": 185.89303080240884, | |
| "completions/min_length": 28.333333333333332, | |
| "completions/min_terminated_length": 28.333333333333332, | |
| "epoch": 0.045368620037807186, | |
| "grad_norm": 0.14972379803657532, | |
| "kl": 4.560748736063639e-05, | |
| "learning_rate": 4e-07, | |
| "loss": -0.0081, | |
| "num_tokens": 942182.0, | |
| "reward": 0.37008477250734967, | |
| "reward_std": 0.11998833467562993, | |
| "rewards/get_embedding_sim/mean": 0.3440430959065755, | |
| "rewards/get_embedding_sim/std": 0.06710867583751678, | |
| "rewards/reward_num_unique_chars/mean": 0.026041666666666668, | |
| "rewards/reward_num_unique_chars/std": 0.14761295169591904, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.13020833333333334, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 959.0, | |
| "completions/mean_length": 307.0069580078125, | |
| "completions/mean_terminated_length": 199.09521484375, | |
| "completions/min_length": 10.333333333333334, | |
| "completions/min_terminated_length": 10.333333333333334, | |
| "epoch": 0.09073724007561437, | |
| "grad_norm": 0.12008437514305115, | |
| "kl": 0.0001388813058535258, | |
| "learning_rate": 1e-06, | |
| "loss": 0.035, | |
| "num_tokens": 1882942.0, | |
| "reward": 0.4796616733074188, | |
| "reward_std": 0.214401513338089, | |
| "rewards/get_embedding_sim/mean": 0.3694185713926951, | |
| "rewards/get_embedding_sim/std": 0.07585694640874863, | |
| "rewards/reward_num_unique_chars/mean": 0.1102430559694767, | |
| "rewards/reward_num_unique_chars/std": 0.2982482860485713, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08072916666666667, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 845.0, | |
| "completions/mean_length": 234.67969258626303, | |
| "completions/mean_terminated_length": 166.36500040690103, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.13610586011342155, | |
| "grad_norm": 0.08606597781181335, | |
| "kl": 0.00013801626240213713, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0172, | |
| "num_tokens": 2735293.0, | |
| "reward": 0.39071526130040485, | |
| "reward_std": 0.1662569542725881, | |
| "rewards/get_embedding_sim/mean": 0.33168746034304303, | |
| "rewards/get_embedding_sim/std": 0.07500659177700679, | |
| "rewards/reward_num_unique_chars/mean": 0.059027779226501785, | |
| "rewards/reward_num_unique_chars/std": 0.22509141763051352, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.10503472222222225, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 983.3333333333334, | |
| "completions/mean_length": 262.79688517252606, | |
| "completions/mean_terminated_length": 173.54302469889322, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.18147448015122875, | |
| "grad_norm": 0.11949238181114197, | |
| "kl": 0.00030877192815144855, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 3627859.0, | |
| "reward": 0.4095470607280731, | |
| "reward_std": 0.18979967882235846, | |
| "rewards/get_embedding_sim/mean": 0.33055397868156433, | |
| "rewards/get_embedding_sim/std": 0.07462155818939209, | |
| "rewards/reward_num_unique_chars/mean": 0.07899305472771327, | |
| "rewards/reward_num_unique_chars/std": 0.25569593409697217, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1362847222222222, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 991.6666666666666, | |
| "completions/mean_length": 316.6762288411458, | |
| "completions/mean_terminated_length": 204.85944112141928, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 0.22684310018903592, | |
| "grad_norm": 0.16435399651527405, | |
| "kl": 0.0005876521269480387, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0529, | |
| "num_tokens": 4554894.0, | |
| "reward": 0.4522427221139272, | |
| "reward_std": 0.205996572971344, | |
| "rewards/get_embedding_sim/mean": 0.35502047340075177, | |
| "rewards/get_embedding_sim/std": 0.076506607234478, | |
| "rewards/reward_num_unique_chars/mean": 0.09722222139437993, | |
| "rewards/reward_num_unique_chars/std": 0.27809616923332214, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.11718750000000004, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 927.0, | |
| "completions/mean_length": 283.77171834309894, | |
| "completions/mean_terminated_length": 184.45149739583334, | |
| "completions/min_length": 24.666666666666668, | |
| "completions/min_terminated_length": 24.666666666666668, | |
| "epoch": 0.2722117202268431, | |
| "grad_norm": 0.17904439568519592, | |
| "kl": 0.0004306634267171224, | |
| "learning_rate": 1e-06, | |
| "loss": 0.036, | |
| "num_tokens": 5464567.0, | |
| "reward": 0.47324784596761066, | |
| "reward_std": 0.2480545292297999, | |
| "rewards/get_embedding_sim/mean": 0.35345616936683655, | |
| "rewards/get_embedding_sim/std": 0.08570993691682816, | |
| "rewards/reward_num_unique_chars/mean": 0.11979166915019353, | |
| "rewards/reward_num_unique_chars/std": 0.32309961318969727, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.07204861111111112, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1011.0, | |
| "completions/mean_length": 230.54688517252603, | |
| "completions/mean_terminated_length": 169.21332804361978, | |
| "completions/min_length": 12.333333333333334, | |
| "completions/min_terminated_length": 12.333333333333334, | |
| "epoch": 0.31758034026465026, | |
| "grad_norm": 0.11123450100421906, | |
| "kl": 0.0011239051818847656, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0222, | |
| "num_tokens": 6313117.0, | |
| "reward": 0.4715224802494049, | |
| "reward_std": 0.2366275986035665, | |
| "rewards/get_embedding_sim/mean": 0.3491266171137492, | |
| "rewards/get_embedding_sim/std": 0.06465367351969083, | |
| "rewards/reward_num_unique_chars/mean": 0.1223958358168602, | |
| "rewards/reward_num_unique_chars/std": 0.3250391185283661, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09895833333333337, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 961.0, | |
| "completions/mean_length": 229.36719258626303, | |
| "completions/mean_terminated_length": 142.55723571777344, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.3629489603024575, | |
| "grad_norm": 0.118320994079113, | |
| "kl": 0.0019257068634033203, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0158, | |
| "num_tokens": 7162132.0, | |
| "reward": 0.5189645787080129, | |
| "reward_std": 0.24159842729568481, | |
| "rewards/get_embedding_sim/mean": 0.3809437155723572, | |
| "rewards/get_embedding_sim/std": 0.0799456536769867, | |
| "rewards/reward_num_unique_chars/mean": 0.13802083084980646, | |
| "rewards/reward_num_unique_chars/std": 0.3419287900129954, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052951388888888874, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 982.3333333333334, | |
| "completions/mean_length": 197.44445292154947, | |
| "completions/mean_terminated_length": 151.25631205240884, | |
| "completions/min_length": 14.666666666666666, | |
| "completions/min_terminated_length": 14.666666666666666, | |
| "epoch": 0.40831758034026466, | |
| "grad_norm": 0.11851406842470169, | |
| "kl": 0.002936681111653646, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0317, | |
| "num_tokens": 7973172.0, | |
| "reward": 0.569815476735433, | |
| "reward_std": 0.25512967507044476, | |
| "rewards/get_embedding_sim/mean": 0.362350195646286, | |
| "rewards/get_embedding_sim/std": 0.07909337679545085, | |
| "rewards/reward_num_unique_chars/mean": 0.2074652761220932, | |
| "rewards/reward_num_unique_chars/std": 0.4044720729192098, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.056423611111111126, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 934.3333333333334, | |
| "completions/mean_length": 211.5963592529297, | |
| "completions/mean_terminated_length": 163.11248270670572, | |
| "completions/min_length": 16.333333333333332, | |
| "completions/min_terminated_length": 16.333333333333332, | |
| "epoch": 0.45368620037807184, | |
| "grad_norm": 0.21573348343372345, | |
| "kl": 0.008742332458496094, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0125, | |
| "num_tokens": 8794371.0, | |
| "reward": 0.43826034665107727, | |
| "reward_std": 0.20837691922982535, | |
| "rewards/get_embedding_sim/mean": 0.3427741924921672, | |
| "rewards/get_embedding_sim/std": 0.0719177375237147, | |
| "rewards/reward_num_unique_chars/mean": 0.09548610945542653, | |
| "rewards/reward_num_unique_chars/std": 0.2681623448928197, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.056423611111111126, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 892.0, | |
| "completions/mean_length": 197.90365091959634, | |
| "completions/mean_terminated_length": 148.50442504882812, | |
| "completions/min_length": 15.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.499054820415879, | |
| "grad_norm": 0.08199404180049896, | |
| "kl": 0.005775133768717448, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0113, | |
| "num_tokens": 9601812.0, | |
| "reward": 0.45480871200561523, | |
| "reward_std": 0.2194500764211019, | |
| "rewards/get_embedding_sim/mean": 0.36192673444747925, | |
| "rewards/get_embedding_sim/std": 0.0750991627573967, | |
| "rewards/reward_num_unique_chars/mean": 0.0928819440305233, | |
| "rewards/reward_num_unique_chars/std": 0.2857237259546916, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.053819444444444454, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 873.3333333333334, | |
| "completions/mean_length": 201.65104166666666, | |
| "completions/mean_terminated_length": 154.5564727783203, | |
| "completions/min_length": 10.666666666666666, | |
| "completions/min_terminated_length": 10.666666666666666, | |
| "epoch": 0.5444234404536862, | |
| "grad_norm": 0.13542793691158295, | |
| "kl": 0.011366526285807291, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0008, | |
| "num_tokens": 10414722.0, | |
| "reward": 0.4134095311164856, | |
| "reward_std": 0.16343241184949875, | |
| "rewards/get_embedding_sim/mean": 0.3708748022715251, | |
| "rewards/get_embedding_sim/std": 0.08833041042089462, | |
| "rewards/reward_num_unique_chars/mean": 0.042534722636143364, | |
| "rewards/reward_num_unique_chars/std": 0.1979833443959554, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04340277777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 891.6666666666666, | |
| "completions/mean_length": 176.1076456705729, | |
| "completions/mean_terminated_length": 137.35225423177084, | |
| "completions/min_length": 11.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.5897920604914934, | |
| "grad_norm": 1.7642544507980347, | |
| "kl": 0.151151974995931, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0179, | |
| "num_tokens": 11207422.0, | |
| "reward": 0.5713514387607574, | |
| "reward_std": 0.26335498690605164, | |
| "rewards/get_embedding_sim/mean": 0.36909447113672894, | |
| "rewards/get_embedding_sim/std": 0.09187572946151097, | |
| "rewards/reward_num_unique_chars/mean": 0.202256940305233, | |
| "rewards/reward_num_unique_chars/std": 0.390445997317632, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032986111111111126, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 826.3333333333334, | |
| "completions/mean_length": 201.41754150390625, | |
| "completions/mean_terminated_length": 173.10018412272134, | |
| "completions/min_length": 18.333333333333332, | |
| "completions/min_terminated_length": 18.333333333333332, | |
| "epoch": 0.6351606805293005, | |
| "grad_norm": 0.10251538455486298, | |
| "kl": 0.014621734619140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0102, | |
| "num_tokens": 12029279.0, | |
| "reward": 0.5142592787742615, | |
| "reward_std": 0.2620675365130107, | |
| "rewards/get_embedding_sim/mean": 0.3701620002587636, | |
| "rewards/get_embedding_sim/std": 0.10092929750680923, | |
| "rewards/reward_num_unique_chars/mean": 0.1440972238779068, | |
| "rewards/reward_num_unique_chars/std": 0.34582529465357464, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026041666666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 927.6666666666666, | |
| "completions/mean_length": 183.04254150390625, | |
| "completions/mean_terminated_length": 160.71800740559897, | |
| "completions/min_length": 14.666666666666666, | |
| "completions/min_terminated_length": 14.666666666666666, | |
| "epoch": 0.6805293005671077, | |
| "grad_norm": 0.09084329754114151, | |
| "kl": 0.015349706013997396, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0004, | |
| "num_tokens": 12816000.0, | |
| "reward": 0.5384640991687775, | |
| "reward_std": 0.22944432497024536, | |
| "rewards/get_embedding_sim/mean": 0.39697099725405377, | |
| "rewards/get_embedding_sim/std": 0.10396929830312729, | |
| "rewards/reward_num_unique_chars/mean": 0.14149305721124014, | |
| "rewards/reward_num_unique_chars/std": 0.3254843403895696, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02777777777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 802.3333333333334, | |
| "completions/mean_length": 165.0295155843099, | |
| "completions/mean_terminated_length": 140.43072509765625, | |
| "completions/min_length": 9.666666666666666, | |
| "completions/min_terminated_length": 9.666666666666666, | |
| "epoch": 0.725897920604915, | |
| "grad_norm": 0.21910759806632996, | |
| "kl": 0.027149200439453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 13587394.0, | |
| "reward": 0.5553397635618845, | |
| "reward_std": 0.23784717917442322, | |
| "rewards/get_embedding_sim/mean": 0.4086383481820424, | |
| "rewards/get_embedding_sim/std": 0.10949051380157471, | |
| "rewards/reward_num_unique_chars/mean": 0.14670138930281004, | |
| "rewards/reward_num_unique_chars/std": 0.33698558807373047, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.7712665406427222, | |
| "grad_norm": 0.09893961995840073, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0047, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.7712665406427222, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.12797619047619044, | |
| "eval_completions/max_length": 880.7678571428571, | |
| "eval_completions/max_terminated_length": 701.3214285714286, | |
| "eval_completions/mean_length": 258.24070589882984, | |
| "eval_completions/mean_terminated_length": 153.6624070576259, | |
| "eval_completions/min_length": 24.446428571428573, | |
| "eval_completions/min_terminated_length": 24.446428571428573, | |
| "eval_kl": 0.0542449951171875, | |
| "eval_loss": 0.026244351640343666, | |
| "eval_num_tokens": 14351398.0, | |
| "eval_reward": 0.524820977555854, | |
| "eval_reward_std": 0.22432494928528154, | |
| "eval_rewards/get_embedding_sim/mean": 0.43479119294456076, | |
| "eval_rewards/get_embedding_sim/std": 0.09110667330345937, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.09002976235933602, | |
| "eval_rewards/reward_num_unique_chars/std": 0.18600706889161042, | |
| "eval_runtime": 2254.2404, | |
| "eval_samples_per_second": 0.025, | |
| "eval_steps_per_second": 0.001, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.044270833333333315, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 921.3333333333334, | |
| "completions/mean_length": 197.77517954508463, | |
| "completions/mean_terminated_length": 159.97277196248373, | |
| "completions/min_length": 9.833333333333334, | |
| "completions/min_terminated_length": 9.833333333333334, | |
| "epoch": 0.8166351606805293, | |
| "grad_norm": 0.08635270595550537, | |
| "kl": 0.030397415161132812, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0077, | |
| "num_tokens": 15200636.0, | |
| "reward": 0.5215439548095068, | |
| "reward_std": 0.23126975446939468, | |
| "rewards/get_embedding_sim/mean": 0.42692585786183673, | |
| "rewards/get_embedding_sim/std": 0.11467409133911133, | |
| "rewards/reward_num_unique_chars/mean": 0.09461805845300357, | |
| "rewards/reward_num_unique_chars/std": 0.28477593511343, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04253472222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 836.6666666666666, | |
| "completions/mean_length": 198.4244842529297, | |
| "completions/mean_terminated_length": 161.79749043782553, | |
| "completions/min_length": 10.333333333333334, | |
| "completions/min_terminated_length": 10.333333333333334, | |
| "epoch": 0.8620037807183365, | |
| "grad_norm": 14.726771354675293, | |
| "kl": 0.21588261922200522, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0104, | |
| "num_tokens": 16019045.0, | |
| "reward": 0.5494122306505839, | |
| "reward_std": 0.24494746327400208, | |
| "rewards/get_embedding_sim/mean": 0.44264134764671326, | |
| "rewards/get_embedding_sim/std": 0.11085022240877151, | |
| "rewards/reward_num_unique_chars/mean": 0.10677083333333333, | |
| "rewards/reward_num_unique_chars/std": 0.30227985978126526, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04253472222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 964.6666666666666, | |
| "completions/mean_length": 200.72309366861978, | |
| "completions/mean_terminated_length": 164.248779296875, | |
| "completions/min_length": 11.333333333333334, | |
| "completions/min_terminated_length": 11.333333333333334, | |
| "epoch": 0.9073724007561437, | |
| "grad_norm": 0.09581304341554642, | |
| "kl": 0.33023325602213544, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0072, | |
| "num_tokens": 16832758.0, | |
| "reward": 0.599389910697937, | |
| "reward_std": 0.26327316959698993, | |
| "rewards/get_embedding_sim/mean": 0.45268850525220233, | |
| "rewards/get_embedding_sim/std": 0.11441038797299068, | |
| "rewards/reward_num_unique_chars/mean": 0.14670139302810034, | |
| "rewards/reward_num_unique_chars/std": 0.31440146267414093, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021701388888888878, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 917.3333333333334, | |
| "completions/mean_length": 186.21094258626303, | |
| "completions/mean_terminated_length": 167.5730946858724, | |
| "completions/min_length": 13.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.9527410207939508, | |
| "grad_norm": 0.08248484879732132, | |
| "kl": 0.04541015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 17637097.0, | |
| "reward": 0.5855847001075745, | |
| "reward_std": 0.2750825683275859, | |
| "rewards/get_embedding_sim/mean": 0.46405691901842755, | |
| "rewards/get_embedding_sim/std": 0.11442819982767105, | |
| "rewards/reward_num_unique_chars/mean": 0.12152778108914693, | |
| "rewards/reward_num_unique_chars/std": 0.3193853000799815, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.031507423371647504, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 930.0, | |
| "completions/mean_length": 193.8086140950521, | |
| "completions/mean_terminated_length": 167.16290283203125, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 0.998109640831758, | |
| "grad_norm": 0.06374574452638626, | |
| "kl": 0.03699493408203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0187, | |
| "num_tokens": 18440914.0, | |
| "reward": 0.6297420461972555, | |
| "reward_std": 0.2834969659646352, | |
| "rewards/get_embedding_sim/mean": 0.47088783979415894, | |
| "rewards/get_embedding_sim/std": 0.11324869592984517, | |
| "rewards/reward_num_unique_chars/mean": 0.1588541641831398, | |
| "rewards/reward_num_unique_chars/std": 0.36384791135787964, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032118055555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 833.3333333333334, | |
| "completions/mean_length": 200.6024373372396, | |
| "completions/mean_terminated_length": 173.2165069580078, | |
| "completions/min_length": 9.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 1.0453686200378072, | |
| "grad_norm": 0.11849670857191086, | |
| "kl": 0.05316925048828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0071, | |
| "num_tokens": 19261832.0, | |
| "reward": 0.5802033940951029, | |
| "reward_std": 0.25838569800059, | |
| "rewards/get_embedding_sim/mean": 0.4734325309594472, | |
| "rewards/get_embedding_sim/std": 0.11253533015648524, | |
| "rewards/reward_num_unique_chars/mean": 0.10677083084980647, | |
| "rewards/reward_num_unique_chars/std": 0.30244183043638867, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.039930555555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 973.0, | |
| "completions/mean_length": 199.55555725097656, | |
| "completions/mean_terminated_length": 165.2902577718099, | |
| "completions/min_length": 9.333333333333334, | |
| "completions/min_terminated_length": 9.333333333333334, | |
| "epoch": 1.0907372400756143, | |
| "grad_norm": 0.10332732647657394, | |
| "kl": 0.0515289306640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0075, | |
| "num_tokens": 20067096.0, | |
| "reward": 0.625789741675059, | |
| "reward_std": 0.2765499949455261, | |
| "rewards/get_embedding_sim/mean": 0.49471331636110943, | |
| "rewards/get_embedding_sim/std": 0.11266019940376282, | |
| "rewards/reward_num_unique_chars/mean": 0.1310763880610466, | |
| "rewards/reward_num_unique_chars/std": 0.336679349342982, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.036458333333333294, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 1008.6666666666666, | |
| "completions/mean_length": 209.8359375, | |
| "completions/mean_terminated_length": 178.97360229492188, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 1.1361058601134215, | |
| "grad_norm": 0.11925654858350754, | |
| "kl": 0.14461263020833334, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0055, | |
| "num_tokens": 20893467.0, | |
| "reward": 0.5831413467725118, | |
| "reward_std": 0.2582869480053584, | |
| "rewards/get_embedding_sim/mean": 0.4919954836368561, | |
| "rewards/get_embedding_sim/std": 0.1114387462536494, | |
| "rewards/reward_num_unique_chars/mean": 0.09114583333333333, | |
| "rewards/reward_num_unique_chars/std": 0.2839343051115672, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.047743055555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 897.3333333333334, | |
| "completions/mean_length": 221.0104217529297, | |
| "completions/mean_terminated_length": 180.71256510416666, | |
| "completions/min_length": 5.0, | |
| "completions/min_terminated_length": 5.0, | |
| "epoch": 1.1814744801512287, | |
| "grad_norm": 0.09554021060466766, | |
| "kl": 0.16336822509765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0197, | |
| "num_tokens": 21730887.0, | |
| "reward": 0.6385945876439413, | |
| "reward_std": 0.2661168724298477, | |
| "rewards/get_embedding_sim/mean": 0.5127264857292175, | |
| "rewards/get_embedding_sim/std": 0.11183823893467586, | |
| "rewards/reward_num_unique_chars/mean": 0.12586805472771326, | |
| "rewards/reward_num_unique_chars/std": 0.3207412262757619, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.049479166666666664, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 939.0, | |
| "completions/mean_length": 228.04601033528647, | |
| "completions/mean_terminated_length": 186.1550038655599, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.2268431001890359, | |
| "grad_norm": 0.07755686342716217, | |
| "kl": 0.05751800537109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0156, | |
| "num_tokens": 22583420.0, | |
| "reward": 0.6019672354062399, | |
| "reward_std": 0.26383428772290546, | |
| "rewards/get_embedding_sim/mean": 0.5021408100922903, | |
| "rewards/get_embedding_sim/std": 0.10627821832895279, | |
| "rewards/reward_num_unique_chars/mean": 0.09982638930281003, | |
| "rewards/reward_num_unique_chars/std": 0.2837299009164174, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029513888888888878, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 931.3333333333334, | |
| "completions/mean_length": 191.74740091959634, | |
| "completions/mean_terminated_length": 166.4415028889974, | |
| "completions/min_length": 8.666666666666666, | |
| "completions/min_terminated_length": 8.666666666666666, | |
| "epoch": 1.272211720226843, | |
| "grad_norm": 0.08697984367609024, | |
| "kl": 0.057329813639322914, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0141, | |
| "num_tokens": 23394137.0, | |
| "reward": 0.6638144056002299, | |
| "reward_std": 0.26522762576738995, | |
| "rewards/get_embedding_sim/mean": 0.5231893658638, | |
| "rewards/get_embedding_sim/std": 0.10482257604598999, | |
| "rewards/reward_num_unique_chars/mean": 0.140625, | |
| "rewards/reward_num_unique_chars/std": 0.3480878472328186, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026041666666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 872.3333333333334, | |
| "completions/mean_length": 185.42535400390625, | |
| "completions/mean_terminated_length": 163.15696716308594, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.3175803402646502, | |
| "grad_norm": 0.14970338344573975, | |
| "kl": 0.12465922037760417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.008, | |
| "num_tokens": 24197571.0, | |
| "reward": 0.6190575559933981, | |
| "reward_std": 0.2601381540298462, | |
| "rewards/get_embedding_sim/mean": 0.5174950361251831, | |
| "rewards/get_embedding_sim/std": 0.0997606838742892, | |
| "rewards/reward_num_unique_chars/mean": 0.10156250124176343, | |
| "rewards/reward_num_unique_chars/std": 0.297150323788325, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029513888888888912, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 965.6666666666666, | |
| "completions/mean_length": 216.75694783528647, | |
| "completions/mean_terminated_length": 192.3217315673828, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.3629489603024574, | |
| "grad_norm": 0.11725780367851257, | |
| "kl": 0.08345540364583333, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0079, | |
| "num_tokens": 25034555.0, | |
| "reward": 0.5995156168937683, | |
| "reward_std": 0.22840352356433868, | |
| "rewards/get_embedding_sim/mean": 0.5118419329325358, | |
| "rewards/get_embedding_sim/std": 0.0987908939520518, | |
| "rewards/reward_num_unique_chars/mean": 0.0876736119389534, | |
| "rewards/reward_num_unique_chars/std": 0.27722589671611786, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029513888888888878, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 884.3333333333334, | |
| "completions/mean_length": 186.4375, | |
| "completions/mean_terminated_length": 161.33899434407553, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 1.4083175803402646, | |
| "grad_norm": 0.10394510626792908, | |
| "kl": 0.07037099202473958, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0084, | |
| "num_tokens": 25831283.0, | |
| "reward": 0.6795124411582947, | |
| "reward_std": 0.29141750435034436, | |
| "rewards/get_embedding_sim/mean": 0.5137137969334921, | |
| "rewards/get_embedding_sim/std": 0.09767910589774449, | |
| "rewards/reward_num_unique_chars/mean": 0.16579860697189966, | |
| "rewards/reward_num_unique_chars/std": 0.3542452355225881, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05034722222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 971.3333333333334, | |
| "completions/mean_length": 212.23785400390625, | |
| "completions/mean_terminated_length": 169.16080729166666, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 1.4536862003780717, | |
| "grad_norm": 0.10010381788015366, | |
| "kl": 0.06960042317708333, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0203, | |
| "num_tokens": 26656485.0, | |
| "reward": 0.6300086975097656, | |
| "reward_std": 0.24619843065738678, | |
| "rewards/get_embedding_sim/mean": 0.5267100731531779, | |
| "rewards/get_embedding_sim/std": 0.1071697548031807, | |
| "rewards/reward_num_unique_chars/mean": 0.1032986119389534, | |
| "rewards/reward_num_unique_chars/std": 0.29826584458351135, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037326388888888874, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 919.0, | |
| "completions/mean_length": 211.8107655843099, | |
| "completions/mean_terminated_length": 180.1980183919271, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 1.499054820415879, | |
| "grad_norm": 0.07485458254814148, | |
| "kl": 0.061063130696614586, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0248, | |
| "num_tokens": 27490315.0, | |
| "reward": 0.620047926902771, | |
| "reward_std": 0.2632503807544708, | |
| "rewards/get_embedding_sim/mean": 0.5132770538330078, | |
| "rewards/get_embedding_sim/std": 0.10026986648639043, | |
| "rewards/reward_num_unique_chars/mean": 0.10677083333333333, | |
| "rewards/reward_num_unique_chars/std": 0.3043619990348816, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.544423440453686, | |
| "grad_norm": 0.11106861382722855, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0115, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.544423440453686, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.07068452380952381, | |
| "eval_completions/max_length": 887.5892857142857, | |
| "eval_completions/max_terminated_length": 675.0892857142857, | |
| "eval_completions/mean_length": 206.3244113922119, | |
| "eval_completions/mean_terminated_length": 145.37539066587175, | |
| "eval_completions/min_length": 18.160714285714285, | |
| "eval_completions/min_terminated_length": 18.160714285714285, | |
| "eval_kl": 0.06965419224330358, | |
| "eval_loss": 0.03773626312613487, | |
| "eval_num_tokens": 28307736.0, | |
| "eval_reward": 0.6229457370936871, | |
| "eval_reward_std": 0.2839882879384926, | |
| "eval_rewards/get_embedding_sim/mean": 0.5206391582531589, | |
| "eval_rewards/get_embedding_sim/std": 0.09148550758670483, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.10230654794057566, | |
| "eval_rewards/reward_num_unique_chars/std": 0.24572753932859218, | |
| "eval_runtime": 1726.6979, | |
| "eval_samples_per_second": 0.032, | |
| "eval_steps_per_second": 0.001, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03602430555555556, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 990.3333333333334, | |
| "completions/mean_length": 214.1545181274414, | |
| "completions/mean_terminated_length": 183.94319661458334, | |
| "completions/min_length": 6.5, | |
| "completions/min_terminated_length": 6.5, | |
| "epoch": 1.5897920604914932, | |
| "grad_norm": 1.0838171243667603, | |
| "kl": 0.0672899881998698, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0253, | |
| "num_tokens": 29138511.0, | |
| "reward": 0.6624543964862823, | |
| "reward_std": 0.26948046932617825, | |
| "rewards/get_embedding_sim/mean": 0.5296418766180674, | |
| "rewards/get_embedding_sim/std": 0.10213356713453929, | |
| "rewards/reward_num_unique_chars/mean": 0.1328124993791183, | |
| "rewards/reward_num_unique_chars/std": 0.32840434461832047, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026041666666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 974.0, | |
| "completions/mean_length": 201.72048950195312, | |
| "completions/mean_terminated_length": 179.72496032714844, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.6351606805293004, | |
| "grad_norm": 0.0918864831328392, | |
| "kl": 0.061335245768229164, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0208, | |
| "num_tokens": 29960717.0, | |
| "reward": 0.6120087305704752, | |
| "reward_std": 0.250284880399704, | |
| "rewards/get_embedding_sim/mean": 0.5364878376324972, | |
| "rewards/get_embedding_sim/std": 0.0979540745417277, | |
| "rewards/reward_num_unique_chars/mean": 0.07552083457509677, | |
| "rewards/reward_num_unique_chars/std": 0.26320414741834003, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03559027777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 923.0, | |
| "completions/mean_length": 206.97309366861978, | |
| "completions/mean_terminated_length": 177.1915079752604, | |
| "completions/min_length": 7.666666666666667, | |
| "completions/min_terminated_length": 7.666666666666667, | |
| "epoch": 1.6805293005671076, | |
| "grad_norm": 0.07678642123937607, | |
| "kl": 0.06285349527994792, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0214, | |
| "num_tokens": 30781870.0, | |
| "reward": 0.6274827718734741, | |
| "reward_std": 0.26556732257207233, | |
| "rewards/get_embedding_sim/mean": 0.5155035257339478, | |
| "rewards/get_embedding_sim/std": 0.09278701990842819, | |
| "rewards/reward_num_unique_chars/mean": 0.11197916666666667, | |
| "rewards/reward_num_unique_chars/std": 0.30655037860075635, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032118055555555546, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 881.3333333333334, | |
| "completions/mean_length": 185.73351542154947, | |
| "completions/mean_terminated_length": 157.97284952799478, | |
| "completions/min_length": 8.333333333333334, | |
| "completions/min_terminated_length": 8.333333333333334, | |
| "epoch": 1.725897920604915, | |
| "grad_norm": 0.07077532261610031, | |
| "kl": 0.07100423177083333, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0255, | |
| "num_tokens": 31578315.0, | |
| "reward": 0.6285200913747152, | |
| "reward_std": 0.2933768729368846, | |
| "rewards/get_embedding_sim/mean": 0.5260895093282064, | |
| "rewards/get_embedding_sim/std": 0.10419273873170216, | |
| "rewards/reward_num_unique_chars/mean": 0.10243055472771327, | |
| "rewards/reward_num_unique_chars/std": 0.30191460251808167, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.035590277777777755, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 998.3333333333334, | |
| "completions/mean_length": 197.5555623372396, | |
| "completions/mean_terminated_length": 167.07290649414062, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.7712665406427222, | |
| "grad_norm": 0.07132314145565033, | |
| "kl": 0.07155863444010417, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0289, | |
| "num_tokens": 32370091.0, | |
| "reward": 0.6605067054430643, | |
| "reward_std": 0.3198150396347046, | |
| "rewards/get_embedding_sim/mean": 0.5276941855748495, | |
| "rewards/get_embedding_sim/std": 0.09764280170202255, | |
| "rewards/reward_num_unique_chars/mean": 0.13281250248352686, | |
| "rewards/reward_num_unique_chars/std": 0.3356940845648448, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03819444444444442, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 925.3333333333334, | |
| "completions/mean_length": 205.5260467529297, | |
| "completions/mean_terminated_length": 172.99127197265625, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.8166351606805293, | |
| "grad_norm": 0.07695771753787994, | |
| "kl": 0.079559326171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0285, | |
| "num_tokens": 33183529.0, | |
| "reward": 0.6505021651585897, | |
| "reward_std": 0.28806476791699726, | |
| "rewards/get_embedding_sim/mean": 0.5255021254221598, | |
| "rewards/get_embedding_sim/std": 0.10448584208885829, | |
| "rewards/reward_num_unique_chars/mean": 0.12499999751647313, | |
| "rewards/reward_num_unique_chars/std": 0.3297826250394185, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 956.0, | |
| "completions/mean_length": 187.78907267252603, | |
| "completions/mean_terminated_length": 160.96214803059897, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 1.8620037807183365, | |
| "grad_norm": 0.10003960132598877, | |
| "kl": 0.10397847493489583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0332, | |
| "num_tokens": 33979270.0, | |
| "reward": 0.702047864596049, | |
| "reward_std": 0.2998199959595998, | |
| "rewards/get_embedding_sim/mean": 0.5353811780611674, | |
| "rewards/get_embedding_sim/std": 0.1009945347905159, | |
| "rewards/reward_num_unique_chars/mean": 0.16666666915019354, | |
| "rewards/reward_num_unique_chars/std": 0.3635033369064331, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0390625, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 961.0, | |
| "completions/mean_length": 201.6701456705729, | |
| "completions/mean_terminated_length": 168.43896484375, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 1.9073724007561437, | |
| "grad_norm": 0.1418294459581375, | |
| "kl": 0.08981831868489583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0311, | |
| "num_tokens": 34790042.0, | |
| "reward": 0.6395866274833679, | |
| "reward_std": 0.278631071249644, | |
| "rewards/get_embedding_sim/mean": 0.5371560255686442, | |
| "rewards/get_embedding_sim/std": 0.10253078490495682, | |
| "rewards/reward_num_unique_chars/mean": 0.10243055721124013, | |
| "rewards/reward_num_unique_chars/std": 0.3033109207948049, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.020833333333333297, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 933.6666666666666, | |
| "completions/mean_length": 165.38021341959634, | |
| "completions/mean_terminated_length": 147.11800384521484, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 1.9527410207939508, | |
| "grad_norm": 0.085059255361557, | |
| "kl": 0.09361775716145833, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0241, | |
| "num_tokens": 35555504.0, | |
| "reward": 0.7251607775688171, | |
| "reward_std": 0.3248043159643809, | |
| "rewards/get_embedding_sim/mean": 0.5359246134757996, | |
| "rewards/get_embedding_sim/std": 0.10831368962923686, | |
| "rewards/reward_num_unique_chars/mean": 0.18923610945542654, | |
| "rewards/reward_num_unique_chars/std": 0.38598161935806274, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.051843869731800774, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 949.6666666666666, | |
| "completions/mean_length": 218.2733408610026, | |
| "completions/mean_terminated_length": 174.35783894856772, | |
| "completions/min_length": 5.333333333333333, | |
| "completions/min_terminated_length": 5.333333333333333, | |
| "epoch": 1.998109640831758, | |
| "grad_norm": 0.15158401429653168, | |
| "kl": 0.09020487467447917, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0421, | |
| "num_tokens": 36367765.0, | |
| "reward": 0.6982676188151041, | |
| "reward_std": 0.3466052810351054, | |
| "rewards/get_embedding_sim/mean": 0.5420175790786743, | |
| "rewards/get_embedding_sim/std": 0.09902476519346237, | |
| "rewards/reward_num_unique_chars/mean": 0.15625, | |
| "rewards/reward_num_unique_chars/std": 0.3612334032853444, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03472222222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 948.0, | |
| "completions/mean_length": 176.51909891764322, | |
| "completions/mean_terminated_length": 146.023562113444, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.045368620037807, | |
| "grad_norm": 0.08893448859453201, | |
| "kl": 0.096466064453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0402, | |
| "num_tokens": 37147067.0, | |
| "reward": 0.7173450986544291, | |
| "reward_std": 0.35685937603314716, | |
| "rewards/get_embedding_sim/mean": 0.5341853896776835, | |
| "rewards/get_embedding_sim/std": 0.1000617394844691, | |
| "rewards/reward_num_unique_chars/mean": 0.1831597238779068, | |
| "rewards/reward_num_unique_chars/std": 0.3842338224252065, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.041666666666666664, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 879.0, | |
| "completions/mean_length": 208.60938008626303, | |
| "completions/mean_terminated_length": 173.26571655273438, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.0907372400756143, | |
| "grad_norm": 0.12921324372291565, | |
| "kl": 0.09910074869791667, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0475, | |
| "num_tokens": 37974473.0, | |
| "reward": 0.672684927781423, | |
| "reward_std": 0.34854390223821, | |
| "rewards/get_embedding_sim/mean": 0.5364001393318176, | |
| "rewards/get_embedding_sim/std": 0.10567483057578404, | |
| "rewards/reward_num_unique_chars/mean": 0.13628472139437994, | |
| "rewards/reward_num_unique_chars/std": 0.34236905972162884, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021701388888888878, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 956.0, | |
| "completions/mean_length": 167.10590616861978, | |
| "completions/mean_terminated_length": 148.0250244140625, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.1361058601134215, | |
| "grad_norm": 0.12040314823389053, | |
| "kl": 0.24815877278645834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0403, | |
| "num_tokens": 38741491.0, | |
| "reward": 0.6958853205045065, | |
| "reward_std": 0.3416078786055247, | |
| "rewards/get_embedding_sim/mean": 0.5396353205045065, | |
| "rewards/get_embedding_sim/std": 0.11144034812847774, | |
| "rewards/reward_num_unique_chars/mean": 0.15625000248352686, | |
| "rewards/reward_num_unique_chars/std": 0.3600207368532817, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.032986111111111084, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 909.0, | |
| "completions/mean_length": 182.28211975097656, | |
| "completions/mean_terminated_length": 153.65155029296875, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 2.1814744801512287, | |
| "grad_norm": 0.08720903098583221, | |
| "kl": 0.11935933430989583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.042, | |
| "num_tokens": 39531464.0, | |
| "reward": 0.7134884198506674, | |
| "reward_std": 0.36159368356068927, | |
| "rewards/get_embedding_sim/mean": 0.5424814422925314, | |
| "rewards/get_embedding_sim/std": 0.11029936373233795, | |
| "rewards/reward_num_unique_chars/mean": 0.17100694278875986, | |
| "rewards/reward_num_unique_chars/std": 0.37471526861190796, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 917.0, | |
| "completions/mean_length": 171.84375508626303, | |
| "completions/mean_terminated_length": 144.29528299967447, | |
| "completions/min_length": 8.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 2.226843100189036, | |
| "grad_norm": 0.08887135237455368, | |
| "kl": 0.118194580078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0381, | |
| "num_tokens": 40311428.0, | |
| "reward": 0.6788019339243571, | |
| "reward_std": 0.33359630902608234, | |
| "rewards/get_embedding_sim/mean": 0.5451213518778483, | |
| "rewards/get_embedding_sim/std": 0.10193872700134914, | |
| "rewards/reward_num_unique_chars/mean": 0.133680559694767, | |
| "rewards/reward_num_unique_chars/std": 0.34027015169461566, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01909722222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 922.6666666666666, | |
| "completions/mean_length": 165.07205200195312, | |
| "completions/mean_terminated_length": 148.45321146647134, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 2.272211720226843, | |
| "grad_norm": 0.08689926564693451, | |
| "kl": 0.149566650390625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0324, | |
| "num_tokens": 41091415.0, | |
| "reward": 0.6884604295094808, | |
| "reward_std": 0.3447088996569316, | |
| "rewards/get_embedding_sim/mean": 0.5495714743932089, | |
| "rewards/get_embedding_sim/std": 0.10353380193312962, | |
| "rewards/reward_num_unique_chars/mean": 0.13888888557751974, | |
| "rewards/reward_num_unique_chars/std": 0.3459552029768626, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.31758034026465, | |
| "grad_norm": 0.10476606339216232, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0513, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.31758034026465, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.04687500000000001, | |
| "eval_completions/max_length": 856.4464285714286, | |
| "eval_completions/max_terminated_length": 614.625, | |
| "eval_completions/mean_length": 148.9296919277736, | |
| "eval_completions/mean_terminated_length": 106.54870585032872, | |
| "eval_completions/min_length": 12.107142857142858, | |
| "eval_completions/min_terminated_length": 12.107142857142858, | |
| "eval_kl": 0.15039280482700892, | |
| "eval_loss": 0.05131923779845238, | |
| "eval_num_tokens": 41858572.0, | |
| "eval_reward": 0.7319182710988181, | |
| "eval_reward_std": 0.39004063113991705, | |
| "eval_rewards/get_embedding_sim/mean": 0.5399539640971592, | |
| "eval_rewards/get_embedding_sim/std": 0.09657471527212433, | |
| "eval_rewards/reward_num_unique_chars/mean": 0.19196428627973156, | |
| "eval_rewards/reward_num_unique_chars/std": 0.34904111203338417, | |
| "eval_runtime": 1578.4274, | |
| "eval_samples_per_second": 0.035, | |
| "eval_steps_per_second": 0.001, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.022135416666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 918.8333333333334, | |
| "completions/mean_length": 154.00824991861978, | |
| "completions/mean_terminated_length": 134.3066151936849, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.3629489603024574, | |
| "grad_norm": 0.17861098051071167, | |
| "kl": 0.1991424560546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0446, | |
| "num_tokens": 42603290.0, | |
| "reward": 0.7928757965564728, | |
| "reward_std": 0.39940689504146576, | |
| "rewards/get_embedding_sim/mean": 0.5420077045758566, | |
| "rewards/get_embedding_sim/std": 0.10503626987338066, | |
| "rewards/reward_num_unique_chars/mean": 0.2508680547277133, | |
| "rewards/reward_num_unique_chars/std": 0.43154530723889667, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02083333333333337, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 943.6666666666666, | |
| "completions/mean_length": 149.3498331705729, | |
| "completions/mean_terminated_length": 130.67583719889322, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 2.4083175803402646, | |
| "grad_norm": 0.0997217446565628, | |
| "kl": 0.176727294921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0479, | |
| "num_tokens": 43362573.0, | |
| "reward": 0.7896133859952291, | |
| "reward_std": 0.37762073675791424, | |
| "rewards/get_embedding_sim/mean": 0.5604466795921326, | |
| "rewards/get_embedding_sim/std": 0.10085596889257431, | |
| "rewards/reward_num_unique_chars/mean": 0.22916666666666666, | |
| "rewards/reward_num_unique_chars/std": 0.417032649119695, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01996527777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 966.0, | |
| "completions/mean_length": 133.52865091959634, | |
| "completions/mean_terminated_length": 115.48833719889323, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.4536862003780717, | |
| "grad_norm": 0.08974426239728928, | |
| "kl": 0.13869730631510416, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0442, | |
| "num_tokens": 44099022.0, | |
| "reward": 0.8535909652709961, | |
| "reward_std": 0.42868249615033466, | |
| "rewards/get_embedding_sim/mean": 0.5332783659299215, | |
| "rewards/get_embedding_sim/std": 0.10329846044381459, | |
| "rewards/reward_num_unique_chars/mean": 0.3203125, | |
| "rewards/reward_num_unique_chars/std": 0.45679094394048053, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.026041666666666668, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 908.0, | |
| "completions/mean_length": 141.3715337117513, | |
| "completions/mean_terminated_length": 117.76270294189453, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.499054820415879, | |
| "grad_norm": 0.10181669145822525, | |
| "kl": 0.23414103190104166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0402, | |
| "num_tokens": 44851706.0, | |
| "reward": 0.8000141382217407, | |
| "reward_std": 0.3941415250301361, | |
| "rewards/get_embedding_sim/mean": 0.5456738670667013, | |
| "rewards/get_embedding_sim/std": 0.10725356390078862, | |
| "rewards/reward_num_unique_chars/mean": 0.2543402810891469, | |
| "rewards/reward_num_unique_chars/std": 0.43460813164711, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.018229166666666703, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 882.6666666666666, | |
| "completions/mean_length": 141.09028116861978, | |
| "completions/mean_terminated_length": 124.72643280029297, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 2.544423440453686, | |
| "grad_norm": 0.08525840193033218, | |
| "kl": 0.20921834309895834, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0455, | |
| "num_tokens": 45604066.0, | |
| "reward": 0.7868956923484802, | |
| "reward_std": 0.40804105003674823, | |
| "rewards/get_embedding_sim/mean": 0.5299512147903442, | |
| "rewards/get_embedding_sim/std": 0.10723193486531575, | |
| "rewards/reward_num_unique_chars/mean": 0.2569444378217061, | |
| "rewards/reward_num_unique_chars/std": 0.42565350731213886, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025173611111111122, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 923.3333333333334, | |
| "completions/mean_length": 138.74913533528647, | |
| "completions/mean_terminated_length": 116.1558354695638, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.5897920604914932, | |
| "grad_norm": 0.6628166437149048, | |
| "kl": 0.317626953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0491, | |
| "num_tokens": 46345857.0, | |
| "reward": 0.8313470085461935, | |
| "reward_std": 0.4129582444826762, | |
| "rewards/get_embedding_sim/mean": 0.5509650309880575, | |
| "rewards/get_embedding_sim/std": 0.09154053280750911, | |
| "rewards/reward_num_unique_chars/mean": 0.2803819427887599, | |
| "rewards/reward_num_unique_chars/std": 0.4417712489763896, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01128472222222221, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 772.6666666666666, | |
| "completions/mean_length": 111.97396087646484, | |
| "completions/mean_terminated_length": 101.59329223632812, | |
| "completions/min_length": 6.666666666666667, | |
| "completions/min_terminated_length": 6.666666666666667, | |
| "epoch": 2.6351606805293004, | |
| "grad_norm": 0.09945366531610489, | |
| "kl": 0.262237548828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.038, | |
| "num_tokens": 47054931.0, | |
| "reward": 0.8715664744377136, | |
| "reward_std": 0.4346109131971995, | |
| "rewards/get_embedding_sim/mean": 0.549517830212911, | |
| "rewards/get_embedding_sim/std": 0.11452717334032059, | |
| "rewards/reward_num_unique_chars/mean": 0.3220486094554265, | |
| "rewards/reward_num_unique_chars/std": 0.4634987811247508, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033854166666666685, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 918.0, | |
| "completions/mean_length": 155.64192962646484, | |
| "completions/mean_terminated_length": 125.11021041870117, | |
| "completions/min_length": 6.0, | |
| "completions/min_terminated_length": 6.0, | |
| "epoch": 2.6805293005671076, | |
| "grad_norm": 0.09938167780637741, | |
| "kl": 0.2503814697265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.054, | |
| "num_tokens": 47795091.0, | |
| "reward": 0.7676738500595093, | |
| "reward_std": 0.39470958709716797, | |
| "rewards/get_embedding_sim/mean": 0.5736633539199829, | |
| "rewards/get_embedding_sim/std": 0.09976038336753845, | |
| "rewards/reward_num_unique_chars/mean": 0.1940104141831398, | |
| "rewards/reward_num_unique_chars/std": 0.3958670049905777, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017361111111111122, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 878.6666666666666, | |
| "completions/mean_length": 116.89757283528645, | |
| "completions/mean_terminated_length": 100.84752400716145, | |
| "completions/min_length": 6.333333333333333, | |
| "completions/min_terminated_length": 6.333333333333333, | |
| "epoch": 2.7258979206049148, | |
| "grad_norm": 0.1425255984067917, | |
| "kl": 0.2775370279947917, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0481, | |
| "num_tokens": 48511037.0, | |
| "reward": 0.863362193107605, | |
| "reward_std": 0.4587005575497945, | |
| "rewards/get_embedding_sim/mean": 0.5473899245262146, | |
| "rewards/get_embedding_sim/std": 0.09922760476668675, | |
| "rewards/reward_num_unique_chars/mean": 0.3159722288449605, | |
| "rewards/reward_num_unique_chars/std": 0.4650394419829051, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.014756944444444456, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 891.3333333333334, | |
| "completions/mean_length": 102.27864837646484, | |
| "completions/mean_terminated_length": 88.46848042805989, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.7712665406427224, | |
| "grad_norm": 0.09843874722719193, | |
| "kl": 0.34393310546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0436, | |
| "num_tokens": 49204478.0, | |
| "reward": 0.8661341269810995, | |
| "reward_std": 0.46143727501233417, | |
| "rewards/get_embedding_sim/mean": 0.5727312763532003, | |
| "rewards/get_embedding_sim/std": 0.11564485480388005, | |
| "rewards/reward_num_unique_chars/mean": 0.2934027711550395, | |
| "rewards/reward_num_unique_chars/std": 0.4531017243862152, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01996527777777779, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 754.6666666666666, | |
| "completions/mean_length": 122.11806233723958, | |
| "completions/mean_terminated_length": 103.76323954264323, | |
| "completions/min_length": 5.666666666666667, | |
| "completions/min_terminated_length": 5.666666666666667, | |
| "epoch": 2.816635160680529, | |
| "grad_norm": 0.15066391229629517, | |
| "kl": 0.3179728190104167, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0582, | |
| "num_tokens": 49927110.0, | |
| "reward": 0.892190178235372, | |
| "reward_std": 0.4511215090751648, | |
| "rewards/get_embedding_sim/mean": 0.5527804295221964, | |
| "rewards/get_embedding_sim/std": 0.1083058441678683, | |
| "rewards/reward_num_unique_chars/mean": 0.3394097238779068, | |
| "rewards/reward_num_unique_chars/std": 0.46636247634887695, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.022569444444444458, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 940.3333333333334, | |
| "completions/mean_length": 126.84115091959636, | |
| "completions/mean_terminated_length": 106.15006764729817, | |
| "completions/min_length": 7.333333333333333, | |
| "completions/min_terminated_length": 7.333333333333333, | |
| "epoch": 2.8620037807183367, | |
| "grad_norm": 2.990046739578247, | |
| "kl": 0.47100830078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0551, | |
| "num_tokens": 50663055.0, | |
| "reward": 0.8623983860015869, | |
| "reward_std": 0.4631191889444987, | |
| "rewards/get_embedding_sim/mean": 0.5533705353736877, | |
| "rewards/get_embedding_sim/std": 0.11140244205792744, | |
| "rewards/reward_num_unique_chars/mean": 0.3090277810891469, | |
| "rewards/reward_num_unique_chars/std": 0.4615551829338074, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01649305555555558, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 891.0, | |
| "completions/mean_length": 111.4730936686198, | |
| "completions/mean_terminated_length": 96.25564575195312, | |
| "completions/min_length": 5.333333333333333, | |
| "completions/min_terminated_length": 5.333333333333333, | |
| "epoch": 2.9073724007561434, | |
| "grad_norm": 0.11676046997308731, | |
| "kl": 0.4471232096354167, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0481, | |
| "num_tokens": 51373952.0, | |
| "reward": 0.9203431606292725, | |
| "reward_std": 0.47053369879722595, | |
| "rewards/get_embedding_sim/mean": 0.5444750587145487, | |
| "rewards/get_embedding_sim/std": 0.1073705404996872, | |
| "rewards/reward_num_unique_chars/mean": 0.3758680522441864, | |
| "rewards/reward_num_unique_chars/std": 0.4811862111091614, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.016493055555555542, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 776.3333333333334, | |
| "completions/mean_length": 113.12673950195312, | |
| "completions/mean_terminated_length": 97.83474731445312, | |
| "completions/min_length": 8.666666666666666, | |
| "completions/min_terminated_length": 8.666666666666666, | |
| "epoch": 2.952741020793951, | |
| "grad_norm": 0.09854816645383835, | |
| "kl": 0.285675048828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0465, | |
| "num_tokens": 52094098.0, | |
| "reward": 0.888769249121348, | |
| "reward_std": 0.46734312176704407, | |
| "rewards/get_embedding_sim/mean": 0.5519636472066244, | |
| "rewards/get_embedding_sim/std": 0.11934416989485423, | |
| "rewards/reward_num_unique_chars/mean": 0.3368055522441864, | |
| "rewards/reward_num_unique_chars/std": 0.4713793396949768, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013888888888888876, | |
| "completions/max_length": 1024.0, | |
| "completions/max_terminated_length": 927.3333333333334, | |
| "completions/mean_length": 112.50087229410808, | |
| "completions/mean_terminated_length": 99.5953369140625, | |
| "completions/min_length": 7.0, | |
| "completions/min_terminated_length": 7.0, | |
| "epoch": 2.998109640831758, | |
| "grad_norm": 0.08575434237718582, | |
| "kl": 0.42242431640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0494, | |
| "num_tokens": 52800707.0, | |
| "reward": 0.9310129086176554, | |
| "reward_std": 0.4663335382938385, | |
| "rewards/get_embedding_sim/mean": 0.5603530804316202, | |
| "rewards/get_embedding_sim/std": 0.10822075108687083, | |
| "rewards/reward_num_unique_chars/mean": 0.3706597288449605, | |
| "rewards/reward_num_unique_chars/std": 0.4820249378681183, | |
| "step": 198 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 198, | |
| "num_input_tokens_seen": 52800707, | |
| "num_train_epochs": 3, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |