kiminaembedbetatriple / trainer_state.json
sorgfresser's picture
Upload folder using huggingface_hub
633c3f6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.998109640831758,
"eval_steps": 51,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1519097222222222,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1009.0,
"completions/mean_length": 318.44444783528644,
"completions/mean_terminated_length": 192.69292195638022,
"completions/min_length": 20.666666666666668,
"completions/min_terminated_length": 20.666666666666668,
"epoch": 0.045368620037807186,
"grad_norm": 0.1771457940340042,
"kl": 5.446871121724447e-05,
"learning_rate": 4e-07,
"loss": -0.0064,
"num_tokens": 949568.0,
"reward": 0.4211084047953288,
"reward_std": 0.3868949313958486,
"rewards/get_embedding_sim/mean": 0.3429833749930064,
"rewards/get_embedding_sim/std": 0.06474291781584422,
"rewards/reward_num_unique_chars/mean": 0.026324149842063587,
"rewards/reward_num_unique_chars/std": 0.1598906268676122,
"step": 3
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1171875,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 992.3333333333334,
"completions/mean_length": 298.11285400390625,
"completions/mean_terminated_length": 200.7570597330729,
"completions/min_length": 9.666666666666666,
"completions/min_terminated_length": 9.666666666666666,
"epoch": 0.09073724007561437,
"grad_norm": 0.12331758439540863,
"kl": 0.00014600654443105063,
"learning_rate": 1e-06,
"loss": 0.0254,
"num_tokens": 1880082.0,
"reward": 0.6416476865609487,
"reward_std": 0.5329093436400095,
"rewards/get_embedding_sim/mean": 0.36821014682451886,
"rewards/get_embedding_sim/std": 0.07475950072209041,
"rewards/reward_num_unique_chars/mean": 0.09220736970504124,
"rewards/reward_num_unique_chars/std": 0.2722427050272624,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08680555555555558,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 970.0,
"completions/mean_length": 252.9539998372396,
"completions/mean_terminated_length": 180.34420776367188,
"completions/min_length": 24.666666666666668,
"completions/min_terminated_length": 24.666666666666668,
"epoch": 0.13610586011342155,
"grad_norm": 0.08608454465866089,
"kl": 0.0001450727383295695,
"learning_rate": 1e-06,
"loss": 0.024,
"num_tokens": 2753485.0,
"reward": 0.5257614056269327,
"reward_std": 0.4465513428052266,
"rewards/get_embedding_sim/mean": 0.33044888575871784,
"rewards/get_embedding_sim/std": 0.07643905778725942,
"rewards/reward_num_unique_chars/mean": 0.06572048738598824,
"rewards/reward_num_unique_chars/std": 0.24313671390215555,
"step": 9
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.10763888888888888,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 980.6666666666666,
"completions/mean_length": 271.1293538411458,
"completions/mean_terminated_length": 180.34170532226562,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"epoch": 0.18147448015122875,
"grad_norm": 0.11351985484361649,
"kl": 0.000451435645421346,
"learning_rate": 1e-06,
"loss": 0.0161,
"num_tokens": 3655650.0,
"reward": 0.5510341823101044,
"reward_std": 0.5266622304916382,
"rewards/get_embedding_sim/mean": 0.332284152507782,
"rewards/get_embedding_sim/std": 0.07756081471840541,
"rewards/reward_num_unique_chars/mean": 0.07334695508082707,
"rewards/reward_num_unique_chars/std": 0.2513364603122075,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1484375,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 975.0,
"completions/mean_length": 322.98785400390625,
"completions/mean_terminated_length": 200.65520731608072,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.22684310018903592,
"grad_norm": 0.1555059850215912,
"kl": 0.0004805326461791992,
"learning_rate": 1e-06,
"loss": 0.0487,
"num_tokens": 4589956.0,
"reward": 0.5868227481842041,
"reward_std": 0.5241502523422241,
"rewards/get_embedding_sim/mean": 0.3524477581183116,
"rewards/get_embedding_sim/std": 0.07752909014622371,
"rewards/reward_num_unique_chars/mean": 0.0786214725424846,
"rewards/reward_num_unique_chars/std": 0.2524682929118474,
"step": 15
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09982638888888888,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 973.0,
"completions/mean_length": 263.34288533528644,
"completions/mean_terminated_length": 178.2960662841797,
"completions/min_length": 17.666666666666668,
"completions/min_terminated_length": 17.666666666666668,
"epoch": 0.2722117202268431,
"grad_norm": 0.15412873029708862,
"kl": 0.0006418625513712565,
"learning_rate": 1e-06,
"loss": 0.0529,
"num_tokens": 5476095.0,
"reward": 0.7471893429756165,
"reward_std": 0.6821479399998983,
"rewards/get_embedding_sim/mean": 0.3487518032391866,
"rewards/get_embedding_sim/std": 0.07891600827376048,
"rewards/reward_num_unique_chars/mean": 0.13329477856556574,
"rewards/reward_num_unique_chars/std": 0.3387155433495839,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08333333333333333,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 989.6666666666666,
"completions/mean_length": 239.4496612548828,
"completions/mean_terminated_length": 168.3182576497396,
"completions/min_length": 14.666666666666666,
"completions/min_terminated_length": 14.666666666666666,
"epoch": 0.31758034026465026,
"grad_norm": 0.19775940477848053,
"kl": 0.001989444096883138,
"learning_rate": 1e-06,
"loss": 0.0232,
"num_tokens": 6334901.0,
"reward": 0.6757530768712362,
"reward_std": 0.5965128739674886,
"rewards/get_embedding_sim/mean": 0.34762802720069885,
"rewards/get_embedding_sim/std": 0.061141988883415856,
"rewards/reward_num_unique_chars/mean": 0.11084798475106557,
"rewards/reward_num_unique_chars/std": 0.31187912821769714,
"step": 21
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09722222222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 973.0,
"completions/mean_length": 223.53906758626303,
"completions/mean_terminated_length": 137.0214869181315,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"epoch": 0.3629489603024575,
"grad_norm": 0.159920796751976,
"kl": 0.003296534220377604,
"learning_rate": 1e-06,
"loss": 0.0371,
"num_tokens": 7177202.0,
"reward": 0.7903947830200195,
"reward_std": 0.7087553143501282,
"rewards/get_embedding_sim/mean": 0.37893640001614887,
"rewards/get_embedding_sim/std": 0.07807190467913945,
"rewards/reward_num_unique_chars/mean": 0.13736129055420557,
"rewards/reward_num_unique_chars/std": 0.3415720462799072,
"step": 24
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04340277777777779,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 871.0,
"completions/mean_length": 189.19965616861978,
"completions/mean_terminated_length": 151.40495808919272,
"completions/min_length": 14.333333333333334,
"completions/min_terminated_length": 14.333333333333334,
"epoch": 0.40831758034026466,
"grad_norm": 0.07982576638460159,
"kl": 0.006541093190511067,
"learning_rate": 1e-06,
"loss": 0.0323,
"num_tokens": 7978744.0,
"reward": 0.9137211839358012,
"reward_std": 0.718700091044108,
"rewards/get_embedding_sim/mean": 0.36163782080014545,
"rewards/get_embedding_sim/std": 0.0756089190642039,
"rewards/reward_num_unique_chars/mean": 0.1940170923868815,
"rewards/reward_num_unique_chars/std": 0.3915421764055888,
"step": 27
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04253472222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 950.3333333333334,
"completions/mean_length": 195.49305725097656,
"completions/mean_terminated_length": 158.83220418294272,
"completions/min_length": 20.666666666666668,
"completions/min_terminated_length": 20.666666666666668,
"epoch": 0.45368620037807184,
"grad_norm": 0.13737693428993225,
"kl": 0.008511225382486979,
"learning_rate": 1e-06,
"loss": 0.0158,
"num_tokens": 8781392.0,
"reward": 0.6430213848749796,
"reward_std": 0.5739699502786001,
"rewards/get_embedding_sim/mean": 0.3461463352044423,
"rewards/get_embedding_sim/std": 0.07520903646945953,
"rewards/reward_num_unique_chars/mean": 0.09946840691069762,
"rewards/reward_num_unique_chars/std": 0.26744696994622547,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.045138888888888874,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 882.3333333333334,
"completions/mean_length": 182.24132283528647,
"completions/mean_terminated_length": 142.2494913736979,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"epoch": 0.499054820415879,
"grad_norm": 0.09873297065496445,
"kl": 0.012536366780598959,
"learning_rate": 1e-06,
"loss": 0.0128,
"num_tokens": 9570790.0,
"reward": 0.6741114258766174,
"reward_std": 0.6212253371874491,
"rewards/get_embedding_sim/mean": 0.35900717973709106,
"rewards/get_embedding_sim/std": 0.0736292873819669,
"rewards/reward_num_unique_chars/mean": 0.10573149348298709,
"rewards/reward_num_unique_chars/std": 0.3019101023674011,
"step": 33
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02864583333333337,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 906.0,
"completions/mean_length": 177.17447916666666,
"completions/mean_terminated_length": 152.01558430989584,
"completions/min_length": 13.333333333333334,
"completions/min_terminated_length": 13.333333333333334,
"epoch": 0.5444234404536862,
"grad_norm": 0.11605791002511978,
"kl": 0.025735855102539062,
"learning_rate": 1e-06,
"loss": 0.0051,
"num_tokens": 10355503.0,
"reward": 0.5276843309402466,
"reward_std": 0.502232551574707,
"rewards/get_embedding_sim/mean": 0.3662259578704834,
"rewards/get_embedding_sim/std": 0.08873194952805837,
"rewards/reward_num_unique_chars/mean": 0.054056490461031594,
"rewards/reward_num_unique_chars/std": 0.22509411970774332,
"step": 36
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.033854166666666664,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 854.3333333333334,
"completions/mean_length": 166.70486450195312,
"completions/mean_terminated_length": 136.90866088867188,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"epoch": 0.5897920604914934,
"grad_norm": 4.433223724365234,
"kl": 0.2714697519938151,
"learning_rate": 1e-06,
"loss": 0.0254,
"num_tokens": 11137371.0,
"reward": 1.016512393951416,
"reward_std": 0.7703921596209208,
"rewards/get_embedding_sim/mean": 0.3654707372188568,
"rewards/get_embedding_sim/std": 0.09141946583986282,
"rewards/reward_num_unique_chars/mean": 0.21773314972718558,
"rewards/reward_num_unique_chars/std": 0.3968968590100606,
"step": 39
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 923.0,
"completions/mean_length": 189.52778116861978,
"completions/mean_terminated_length": 164.9041544596354,
"completions/min_length": 23.333333333333332,
"completions/min_terminated_length": 23.333333333333332,
"epoch": 0.6351606805293005,
"grad_norm": 0.09748831391334534,
"kl": 0.030905405680338543,
"learning_rate": 1e-06,
"loss": 0.0173,
"num_tokens": 11945531.0,
"reward": 0.7334451675415039,
"reward_std": 0.6726242105166117,
"rewards/get_embedding_sim/mean": 0.3584451178709666,
"rewards/get_embedding_sim/std": 0.09209247678518295,
"rewards/reward_num_unique_chars/mean": 0.12500000248352686,
"rewards/reward_num_unique_chars/std": 0.32361265023549396,
"step": 42
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02690972222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 825.6666666666666,
"completions/mean_length": 168.27778116861978,
"completions/mean_terminated_length": 144.8372548421224,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.6805293005671077,
"grad_norm": 0.0829065814614296,
"kl": 0.028959910074869793,
"learning_rate": 1e-06,
"loss": 0.0053,
"num_tokens": 12715243.0,
"reward": 0.77097487449646,
"reward_std": 0.5438057780265808,
"rewards/get_embedding_sim/mean": 0.3829539120197296,
"rewards/get_embedding_sim/std": 0.09809910257657369,
"rewards/reward_num_unique_chars/mean": 0.13059413681427637,
"rewards/reward_num_unique_chars/std": 0.31896015008290607,
"step": 45
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.022569444444444458,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 828.0,
"completions/mean_length": 157.07465616861978,
"completions/mean_terminated_length": 136.9731216430664,
"completions/min_length": 10.666666666666666,
"completions/min_terminated_length": 10.666666666666666,
"epoch": 0.725897920604915,
"grad_norm": 0.09397952258586884,
"kl": 0.044497172037760414,
"learning_rate": 1e-06,
"loss": -0.0032,
"num_tokens": 13477473.0,
"reward": 0.8347963392734528,
"reward_std": 0.6062483191490173,
"rewards/get_embedding_sim/mean": 0.3972962299982707,
"rewards/get_embedding_sim/std": 0.10549474010864894,
"rewards/reward_num_unique_chars/mean": 0.14635550851623216,
"rewards/reward_num_unique_chars/std": 0.3286245862642924,
"step": 48
},
{
"epoch": 0.7712665406427222,
"grad_norm": 1.0158724784851074,
"learning_rate": 1e-06,
"loss": 0.0131,
"step": 51
},
{
"epoch": 0.7712665406427222,
"eval_clip_ratio/high_max": 0.0,
"eval_clip_ratio/high_mean": 0.0,
"eval_clip_ratio/low_mean": 0.0,
"eval_clip_ratio/low_min": 0.0,
"eval_clip_ratio/region_mean": 0.0,
"eval_completions/clipped_ratio": 0.1023065476190476,
"eval_completions/max_length": 850.0357142857143,
"eval_completions/max_terminated_length": 649.3214285714286,
"eval_completions/mean_length": 221.02716418675013,
"eval_completions/mean_terminated_length": 136.32026665551322,
"eval_completions/min_length": 25.892857142857142,
"eval_completions/min_terminated_length": 25.892857142857142,
"eval_kl": 0.05330167497907366,
"eval_loss": 0.022182755172252655,
"eval_num_tokens": 14225380.0,
"eval_reward": 0.6993682932640825,
"eval_reward_std": 0.6098802514108164,
"eval_rewards/get_embedding_sim/mean": 0.408073626724737,
"eval_rewards/get_embedding_sim/std": 0.08180103763671857,
"eval_rewards/reward_num_unique_chars/mean": 0.09725111100955733,
"eval_rewards/reward_num_unique_chars/std": 0.1932054047605821,
"eval_runtime": 6593.9311,
"eval_samples_per_second": 0.008,
"eval_steps_per_second": 0.0,
"step": 51
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.031249999999999983,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 923.5,
"completions/mean_length": 173.74523162841797,
"completions/mean_terminated_length": 146.51427459716797,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.8166351606805293,
"grad_norm": 0.13796095550060272,
"kl": 0.061681111653645836,
"learning_rate": 1e-06,
"loss": 0.0132,
"num_tokens": 15035350.0,
"reward": 0.7754727999369303,
"reward_std": 0.6537116318941116,
"rewards/get_embedding_sim/mean": 0.3978685835997264,
"rewards/get_embedding_sim/std": 0.10740451887249947,
"rewards/reward_num_unique_chars/mean": 0.1262344146768252,
"rewards/reward_num_unique_chars/std": 0.31391797463099164,
"step": 54
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.032986111111111126,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 895.6666666666666,
"completions/mean_length": 179.25955708821616,
"completions/mean_terminated_length": 150.37726338704428,
"completions/min_length": 9.666666666666666,
"completions/min_terminated_length": 9.666666666666666,
"epoch": 0.8620037807183365,
"grad_norm": 9.711634635925293,
"kl": 0.30323028564453125,
"learning_rate": 1e-06,
"loss": 0.0171,
"num_tokens": 15831681.0,
"reward": 0.8330511649449667,
"reward_std": 0.7141762177149454,
"rewards/get_embedding_sim/mean": 0.392946978410085,
"rewards/get_embedding_sim/std": 0.10180553545554479,
"rewards/reward_num_unique_chars/mean": 0.14735475679238638,
"rewards/reward_num_unique_chars/std": 0.34930500388145447,
"step": 57
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03472222222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 920.3333333333334,
"completions/mean_length": 175.86719258626303,
"completions/mean_terminated_length": 145.65855916341147,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.9073724007561437,
"grad_norm": 0.11200369894504547,
"kl": 0.056910196940104164,
"learning_rate": 1e-06,
"loss": 0.0094,
"num_tokens": 16616760.0,
"reward": 0.9081356525421143,
"reward_std": 0.7194747726122538,
"rewards/get_embedding_sim/mean": 0.40292728940645856,
"rewards/get_embedding_sim/std": 0.11402523269255956,
"rewards/reward_num_unique_chars/mean": 0.17317021762331328,
"rewards/reward_num_unique_chars/std": 0.34040839473406476,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013888888888888914,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 768.3333333333334,
"completions/mean_length": 162.06250508626303,
"completions/mean_terminated_length": 149.80463155110678,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.9527410207939508,
"grad_norm": 0.08261118829250336,
"kl": 0.08898417154947917,
"learning_rate": 1e-06,
"loss": 0.012,
"num_tokens": 17393280.0,
"reward": 0.8178274830182394,
"reward_std": 0.7919754783312479,
"rewards/get_embedding_sim/mean": 0.4141816198825836,
"rewards/get_embedding_sim/std": 0.11467475444078445,
"rewards/reward_num_unique_chars/mean": 0.13454861069718996,
"rewards/reward_num_unique_chars/std": 0.33077992002169293,
"step": 63
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0385656130268199,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 884.3333333333334,
"completions/mean_length": 183.8406778971354,
"completions/mean_terminated_length": 149.68450419108072,
"completions/min_length": 9.333333333333334,
"completions/min_terminated_length": 9.333333333333334,
"epoch": 0.998109640831758,
"grad_norm": 0.07706479728221893,
"kl": 0.040013631184895836,
"learning_rate": 1e-06,
"loss": 0.0204,
"num_tokens": 18173273.0,
"reward": 0.9729219675064087,
"reward_std": 0.8207030693689982,
"rewards/get_embedding_sim/mean": 0.4208385944366455,
"rewards/get_embedding_sim/std": 0.11511148760716121,
"rewards/reward_num_unique_chars/mean": 0.18582184116045633,
"rewards/reward_num_unique_chars/std": 0.3880065679550171,
"step": 66
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.036458333333333336,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 975.0,
"completions/mean_length": 182.76996866861978,
"completions/mean_terminated_length": 150.8812713623047,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"epoch": 1.0453686200378072,
"grad_norm": 0.12481274455785751,
"kl": 0.06750742594401042,
"learning_rate": 1e-06,
"loss": 0.0078,
"num_tokens": 18973648.0,
"reward": 0.7922607262929281,
"reward_std": 0.778564453125,
"rewards/get_embedding_sim/mean": 0.43028150995572406,
"rewards/get_embedding_sim/std": 0.11360271523396175,
"rewards/reward_num_unique_chars/mean": 0.12104393541812897,
"rewards/reward_num_unique_chars/std": 0.3201603094736735,
"step": 69
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.027777777777777752,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 924.0,
"completions/mean_length": 164.88194529215494,
"completions/mean_terminated_length": 140.49947357177734,
"completions/min_length": 10.333333333333334,
"completions/min_terminated_length": 10.333333333333334,
"epoch": 1.0907372400756143,
"grad_norm": 0.27184560894966125,
"kl": 0.10882568359375,
"learning_rate": 1e-06,
"loss": 0.0144,
"num_tokens": 19738968.0,
"reward": 0.9275963107744852,
"reward_std": 0.841428816318512,
"rewards/get_embedding_sim/mean": 0.4406171242396037,
"rewards/get_embedding_sim/std": 0.11815810203552246,
"rewards/reward_num_unique_chars/mean": 0.162567267815272,
"rewards/reward_num_unique_chars/std": 0.36787914236386615,
"step": 72
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.035590277777777755,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 858.0,
"completions/mean_length": 176.44878641764322,
"completions/mean_terminated_length": 145.19543965657553,
"completions/min_length": 8.333333333333334,
"completions/min_terminated_length": 8.333333333333334,
"epoch": 1.1361058601134215,
"grad_norm": 0.11772840470075607,
"kl": 0.132965087890625,
"learning_rate": 1e-06,
"loss": 0.0071,
"num_tokens": 20526877.0,
"reward": 0.7764408787091573,
"reward_std": 0.7020115653673807,
"rewards/get_embedding_sim/mean": 0.4378991524378459,
"rewards/get_embedding_sim/std": 0.11236891647179921,
"rewards/reward_num_unique_chars/mean": 0.11354367559154828,
"rewards/reward_num_unique_chars/std": 0.31067532300949097,
"step": 75
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.030381944444444458,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 938.6666666666666,
"completions/mean_length": 181.06771341959634,
"completions/mean_terminated_length": 154.67694600423178,
"completions/min_length": 10.333333333333334,
"completions/min_terminated_length": 10.333333333333334,
"epoch": 1.1814744801512287,
"grad_norm": 0.21804682910442352,
"kl": 0.09952545166015625,
"learning_rate": 1e-06,
"loss": 0.0079,
"num_tokens": 21325291.0,
"reward": 0.8087505102157593,
"reward_std": 0.7311090230941772,
"rewards/get_embedding_sim/mean": 0.4597921272118886,
"rewards/get_embedding_sim/std": 0.12011716266473134,
"rewards/reward_num_unique_chars/mean": 0.11631944527228673,
"rewards/reward_num_unique_chars/std": 0.3020235498746236,
"step": 78
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1007.3333333333334,
"completions/mean_length": 206.53125508626303,
"completions/mean_terminated_length": 182.36050415039062,
"completions/min_length": 9.666666666666666,
"completions/min_terminated_length": 9.666666666666666,
"epoch": 1.2268431001890359,
"grad_norm": 0.1415005475282669,
"kl": 0.170013427734375,
"learning_rate": 1e-06,
"loss": 0.0191,
"num_tokens": 22125439.0,
"reward": 0.8599557876586914,
"reward_std": 0.6892009973526001,
"rewards/get_embedding_sim/mean": 0.45630990465482074,
"rewards/get_embedding_sim/std": 0.11122701565424602,
"rewards/reward_num_unique_chars/mean": 0.1345486119389534,
"rewards/reward_num_unique_chars/std": 0.3128484884897868,
"step": 81
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.039930555555555546,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 938.6666666666666,
"completions/mean_length": 214.3307342529297,
"completions/mean_terminated_length": 180.5359090169271,
"completions/min_length": 12.666666666666666,
"completions/min_terminated_length": 12.666666666666666,
"epoch": 1.272211720226843,
"grad_norm": 0.11570374667644501,
"kl": 0.06879933675130208,
"learning_rate": 1e-06,
"loss": 0.0228,
"num_tokens": 22955116.0,
"reward": 0.9702663818995158,
"reward_std": 0.7755107680956522,
"rewards/get_embedding_sim/mean": 0.47287049889564514,
"rewards/get_embedding_sim/std": 0.11713164548079173,
"rewards/reward_num_unique_chars/mean": 0.16612045466899872,
"rewards/reward_num_unique_chars/std": 0.3707600136597951,
"step": 84
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.032986111111111126,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 982.0,
"completions/mean_length": 191.13976033528647,
"completions/mean_terminated_length": 162.73322041829428,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 1.3175803402646502,
"grad_norm": 0.19582344591617584,
"kl": 0.08817799886067708,
"learning_rate": 1e-06,
"loss": 0.0186,
"num_tokens": 23758269.0,
"reward": 0.8809124827384949,
"reward_std": 0.7492716908454895,
"rewards/get_embedding_sim/mean": 0.4720582564671834,
"rewards/get_embedding_sim/std": 0.11799828956524532,
"rewards/reward_num_unique_chars/mean": 0.13729924211899439,
"rewards/reward_num_unique_chars/std": 0.3396035333474477,
"step": 87
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.032118055555555546,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 898.6666666666666,
"completions/mean_length": 155.8697967529297,
"completions/mean_terminated_length": 127.03400421142578,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 1.3629489603024574,
"grad_norm": 0.10882719606161118,
"kl": 0.08435567220052083,
"learning_rate": 1e-06,
"loss": 0.0091,
"num_tokens": 24522615.0,
"reward": 0.9096565643946329,
"reward_std": 0.7040959596633911,
"rewards/get_embedding_sim/mean": 0.4825731615225474,
"rewards/get_embedding_sim/std": 0.11133117477099101,
"rewards/reward_num_unique_chars/mean": 0.1426701620221138,
"rewards/reward_num_unique_chars/std": 0.3476703961690267,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 879.3333333333334,
"completions/mean_length": 171.0026092529297,
"completions/mean_terminated_length": 145.80119832356772,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 1.4083175803402646,
"grad_norm": 0.16034463047981262,
"kl": 0.0664825439453125,
"learning_rate": 1e-06,
"loss": 0.0173,
"num_tokens": 25303194.0,
"reward": 1.1405272086461384,
"reward_std": 0.8232053716977438,
"rewards/get_embedding_sim/mean": 0.4634438355763753,
"rewards/get_embedding_sim/std": 0.11458807935317357,
"rewards/reward_num_unique_chars/mean": 0.22588256498177847,
"rewards/reward_num_unique_chars/std": 0.4179500639438629,
"step": 93
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.038194444444444454,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 954.3333333333334,
"completions/mean_length": 197.03039042154947,
"completions/mean_terminated_length": 164.30577087402344,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"epoch": 1.4536862003780717,
"grad_norm": 1.1371827125549316,
"kl": 0.16266377766927084,
"learning_rate": 1e-06,
"loss": 0.018,
"num_tokens": 26107613.0,
"reward": 0.8050010005633036,
"reward_std": 0.7417031327883402,
"rewards/get_embedding_sim/mean": 0.4768759409586589,
"rewards/get_embedding_sim/std": 0.1163704867164294,
"rewards/reward_num_unique_chars/mean": 0.10968360553185146,
"rewards/reward_num_unique_chars/std": 0.29880866408348083,
"step": 96
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.037326388888888916,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1002.6666666666666,
"completions/mean_length": 184.1883748372396,
"completions/mean_terminated_length": 151.9073689778646,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 1.499054820415879,
"grad_norm": 0.08430308103561401,
"kl": 0.07194010416666667,
"learning_rate": 1e-06,
"loss": 0.0169,
"num_tokens": 26899254.0,
"reward": 0.8056914011637369,
"reward_std": 0.7580650448799133,
"rewards/get_embedding_sim/mean": 0.48798303802808124,
"rewards/get_embedding_sim/std": 0.11710481345653534,
"rewards/reward_num_unique_chars/mean": 0.106216366092364,
"rewards/reward_num_unique_chars/std": 0.3037123878796895,
"step": 99
},
{
"epoch": 1.544423440453686,
"grad_norm": 0.11377694457769394,
"learning_rate": 1e-06,
"loss": 0.0254,
"step": 102
},
{
"epoch": 1.544423440453686,
"eval_clip_ratio/high_max": 0.0,
"eval_clip_ratio/high_mean": 0.0,
"eval_clip_ratio/low_mean": 0.0,
"eval_clip_ratio/low_min": 0.0,
"eval_clip_ratio/region_mean": 0.0,
"eval_completions/clipped_ratio": 0.10007440476190474,
"eval_completions/max_length": 869.5892857142857,
"eval_completions/max_terminated_length": 639.0535714285714,
"eval_completions/mean_length": 225.3545457976205,
"eval_completions/mean_terminated_length": 140.344126360757,
"eval_completions/min_length": 19.357142857142858,
"eval_completions/min_terminated_length": 19.357142857142858,
"eval_kl": 0.07553209577287946,
"eval_loss": 0.037391725927591324,
"eval_num_tokens": 27703933.0,
"eval_reward": 0.7799429536930153,
"eval_reward_std": 0.6953434666751751,
"eval_rewards/get_embedding_sim/mean": 0.47748757898807526,
"eval_rewards/get_embedding_sim/std": 0.0975222562971924,
"eval_rewards/reward_num_unique_chars/mean": 0.10085803000921649,
"eval_rewards/reward_num_unique_chars/std": 0.22144863993993827,
"eval_runtime": 5743.3373,
"eval_samples_per_second": 0.01,
"eval_steps_per_second": 0.0,
"step": 102
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 905.3333333333334,
"completions/mean_length": 192.20443216959634,
"completions/mean_terminated_length": 158.34148915608725,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"epoch": 1.5897920604914932,
"grad_norm": 0.14566491544246674,
"kl": 0.0706634521484375,
"learning_rate": 1e-06,
"loss": 0.0208,
"num_tokens": 28512525.0,
"reward": 0.8942790528138479,
"reward_std": 0.7560157477855682,
"rewards/get_embedding_sim/mean": 0.47891440490881604,
"rewards/get_embedding_sim/std": 0.11716391022006671,
"rewards/reward_num_unique_chars/mean": 0.1388231466213862,
"rewards/reward_num_unique_chars/std": 0.3291383981704712,
"step": 105
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03472222222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 964.0,
"completions/mean_length": 205.2352498372396,
"completions/mean_terminated_length": 176.03035990397134,
"completions/min_length": 5.333333333333333,
"completions/min_terminated_length": 5.333333333333333,
"epoch": 1.6351606805293004,
"grad_norm": 0.08621126413345337,
"kl": 0.08469390869140625,
"learning_rate": 1e-06,
"loss": 0.027,
"num_tokens": 29338780.0,
"reward": 0.8891541957855225,
"reward_std": 0.8206586241722107,
"rewards/get_embedding_sim/mean": 0.4672791560490926,
"rewards/get_embedding_sim/std": 0.11891171584526698,
"rewards/reward_num_unique_chars/mean": 0.14074058582385382,
"rewards/reward_num_unique_chars/std": 0.34693758686383563,
"step": 108
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.032118055555555546,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 932.0,
"completions/mean_length": 195.6493123372396,
"completions/mean_terminated_length": 168.39810689290366,
"completions/min_length": 8.333333333333334,
"completions/min_terminated_length": 8.333333333333334,
"epoch": 1.6805293005671076,
"grad_norm": 0.09061074256896973,
"kl": 0.061197916666666664,
"learning_rate": 1e-06,
"loss": 0.0088,
"num_tokens": 30140024.0,
"reward": 0.8708882729212443,
"reward_std": 0.6558753848075867,
"rewards/get_embedding_sim/mean": 0.49067989985148114,
"rewards/get_embedding_sim/std": 0.11308762182792027,
"rewards/reward_num_unique_chars/mean": 0.12729256972670555,
"rewards/reward_num_unique_chars/std": 0.3063565840323766,
"step": 111
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 971.3333333333334,
"completions/mean_length": 168.0746612548828,
"completions/mean_terminated_length": 142.76580810546875,
"completions/min_length": 9.666666666666666,
"completions/min_terminated_length": 9.666666666666666,
"epoch": 1.725897920604915,
"grad_norm": 0.07783554494380951,
"kl": 0.0714569091796875,
"learning_rate": 1e-06,
"loss": 0.0222,
"num_tokens": 30914926.0,
"reward": 0.9490655660629272,
"reward_std": 0.8103155891100565,
"rewards/get_embedding_sim/mean": 0.4881279369195302,
"rewards/get_embedding_sim/std": 0.1105448305606842,
"rewards/reward_num_unique_chars/mean": 0.1541931927204132,
"rewards/reward_num_unique_chars/std": 0.3540232678254445,
"step": 114
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.036458333333333336,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 918.3333333333334,
"completions/mean_length": 165.41146341959634,
"completions/mean_terminated_length": 132.9504165649414,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 1.7712665406427222,
"grad_norm": 0.08974138647317886,
"kl": 0.08209737141927083,
"learning_rate": 1e-06,
"loss": 0.0198,
"num_tokens": 31681096.0,
"reward": 0.8335268894831339,
"reward_std": 0.716159999370575,
"rewards/get_embedding_sim/mean": 0.4897768298784892,
"rewards/get_embedding_sim/std": 0.12030263990163803,
"rewards/reward_num_unique_chars/mean": 0.11484397575259209,
"rewards/reward_num_unique_chars/std": 0.30110697944959003,
"step": 117
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.040798611111111084,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 938.6666666666666,
"completions/mean_length": 202.7447967529297,
"completions/mean_terminated_length": 167.81108601888022,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"epoch": 1.8166351606805293,
"grad_norm": 0.10199436545372009,
"kl": 0.07517751057942708,
"learning_rate": 1e-06,
"loss": 0.0319,
"num_tokens": 32496610.0,
"reward": 0.860044519106547,
"reward_std": 0.7602864901224772,
"rewards/get_embedding_sim/mean": 0.4876486460367839,
"rewards/get_embedding_sim/std": 0.11599687735239665,
"rewards/reward_num_unique_chars/mean": 0.12465803200999896,
"rewards/reward_num_unique_chars/std": 0.3174656927585602,
"step": 120
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1003.0,
"completions/mean_length": 187.55816141764322,
"completions/mean_terminated_length": 160.59460957845053,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 1.8620037807183365,
"grad_norm": 1.411887526512146,
"kl": 0.13691202799479166,
"learning_rate": 1e-06,
"loss": 0.0295,
"num_tokens": 33302501.0,
"reward": 0.8890740275382996,
"reward_std": 0.8625878095626831,
"rewards/get_embedding_sim/mean": 0.4724073112010956,
"rewards/get_embedding_sim/std": 0.11733246843020122,
"rewards/reward_num_unique_chars/mean": 0.13933624823888144,
"rewards/reward_num_unique_chars/std": 0.34191163380940753,
"step": 123
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025173611111111088,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 950.0,
"completions/mean_length": 171.25347900390625,
"completions/mean_terminated_length": 149.3338419596354,
"completions/min_length": 6.333333333333333,
"completions/min_terminated_length": 6.333333333333333,
"epoch": 1.9073724007561437,
"grad_norm": 0.10643448680639267,
"kl": 0.0879974365234375,
"learning_rate": 1e-06,
"loss": 0.0193,
"num_tokens": 34082265.0,
"reward": 1.0310540199279785,
"reward_std": 0.83027583360672,
"rewards/get_embedding_sim/mean": 0.4763664702574412,
"rewards/get_embedding_sim/std": 0.1186542958021164,
"rewards/reward_num_unique_chars/mean": 0.1909722164273262,
"rewards/reward_num_unique_chars/std": 0.3733387490113576,
"step": 126
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020833333333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 1008.3333333333334,
"completions/mean_length": 175.5920206705729,
"completions/mean_terminated_length": 157.43394470214844,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 1.9527410207939508,
"grad_norm": 0.083248071372509,
"kl": 0.07168070475260417,
"learning_rate": 1e-06,
"loss": 0.0234,
"num_tokens": 34874371.0,
"reward": 0.9281045397122701,
"reward_std": 0.9109238783518473,
"rewards/get_embedding_sim/mean": 0.4853961269060771,
"rewards/get_embedding_sim/std": 0.12320189674695332,
"rewards/reward_num_unique_chars/mean": 0.14780289431413016,
"rewards/reward_num_unique_chars/std": 0.35039229194323224,
"step": 129
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.026041666666666668,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 911.6666666666666,
"completions/mean_length": 177.00694783528647,
"completions/mean_terminated_length": 154.32052103678384,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"epoch": 1.998109640831758,
"grad_norm": 0.0692070946097374,
"kl": 0.07420603434244792,
"learning_rate": 1e-06,
"loss": 0.0336,
"num_tokens": 35655291.0,
"reward": 1.0533938805262248,
"reward_std": 0.9196257392565409,
"rewards/get_embedding_sim/mean": 0.4934980074564616,
"rewards/get_embedding_sim/std": 0.11738153547048569,
"rewards/reward_num_unique_chars/mean": 0.1887365331252416,
"rewards/reward_num_unique_chars/std": 0.3903753161430359,
"step": 132
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 963.3333333333334,
"completions/mean_length": 180.38281758626303,
"completions/mean_terminated_length": 155.4217987060547,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 2.045368620037807,
"grad_norm": 0.1576370894908905,
"kl": 0.11295064290364583,
"learning_rate": 1e-06,
"loss": 0.0335,
"num_tokens": 36452916.0,
"reward": 0.9172398447990417,
"reward_std": 0.9673983256022135,
"rewards/get_embedding_sim/mean": 0.48755229512850445,
"rewards/get_embedding_sim/std": 0.11796744416157405,
"rewards/reward_num_unique_chars/mean": 0.14384527256091437,
"rewards/reward_num_unique_chars/std": 0.3497835397720337,
"step": 135
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.026909722222222248,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 972.3333333333334,
"completions/mean_length": 170.28646341959634,
"completions/mean_terminated_length": 146.62708536783853,
"completions/min_length": 5.333333333333333,
"completions/min_terminated_length": 5.333333333333333,
"epoch": 2.0907372400756143,
"grad_norm": 0.5436683893203735,
"kl": 0.1591796875,
"learning_rate": 1e-06,
"loss": 0.0342,
"num_tokens": 37224462.0,
"reward": 1.0363986889521282,
"reward_std": 0.9765956203142802,
"rewards/get_embedding_sim/mean": 0.5051485598087311,
"rewards/get_embedding_sim/std": 0.11746565749247868,
"rewards/reward_num_unique_chars/mean": 0.17818759878476462,
"rewards/reward_num_unique_chars/std": 0.3826761841773987,
"step": 138
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.038194444444444454,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 928.0,
"completions/mean_length": 174.79948933919272,
"completions/mean_terminated_length": 140.99752298990884,
"completions/min_length": 8.333333333333334,
"completions/min_terminated_length": 8.333333333333334,
"epoch": 2.1361058601134215,
"grad_norm": 0.11291619390249252,
"kl": 0.13877360026041666,
"learning_rate": 1e-06,
"loss": 0.0321,
"num_tokens": 38010471.0,
"reward": 0.936789353688558,
"reward_std": 0.8961972991625468,
"rewards/get_embedding_sim/mean": 0.49408095081647235,
"rewards/get_embedding_sim/std": 0.11985934029022853,
"rewards/reward_num_unique_chars/mean": 0.14794171353181204,
"rewards/reward_num_unique_chars/std": 0.3542039096355438,
"step": 141
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.055555555555555546,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 905.0,
"completions/mean_length": 189.78819783528647,
"completions/mean_terminated_length": 140.69151306152344,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 2.1814744801512287,
"grad_norm": 0.12472045421600342,
"kl": 0.2596638997395833,
"learning_rate": 1e-06,
"loss": 0.0503,
"num_tokens": 38811923.0,
"reward": 1.0600279172261555,
"reward_std": 0.9520064989725748,
"rewards/get_embedding_sim/mean": 0.5053403675556183,
"rewards/get_embedding_sim/std": 0.12481692930062611,
"rewards/reward_num_unique_chars/mean": 0.18512474993864694,
"rewards/reward_num_unique_chars/std": 0.3818445106347402,
"step": 144
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03559027777777779,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 887.3333333333334,
"completions/mean_length": 186.22656758626303,
"completions/mean_terminated_length": 155.0603485107422,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"epoch": 2.226843100189036,
"grad_norm": 0.2647012174129486,
"kl": 0.14057413736979166,
"learning_rate": 1e-06,
"loss": 0.0445,
"num_tokens": 39616280.0,
"reward": 1.021846095720927,
"reward_std": 1.002595583597819,
"rewards/get_embedding_sim/mean": 0.48799189925193787,
"rewards/get_embedding_sim/std": 0.12229083478450775,
"rewards/reward_num_unique_chars/mean": 0.17855327824751535,
"rewards/reward_num_unique_chars/std": 0.3721735179424286,
"step": 147
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01996527777777779,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 975.6666666666666,
"completions/mean_length": 152.78299458821616,
"completions/mean_terminated_length": 135.05128479003906,
"completions/min_length": 7.666666666666667,
"completions/min_terminated_length": 7.666666666666667,
"epoch": 2.272211720226843,
"grad_norm": 0.07731039077043533,
"kl": 0.15080769856770834,
"learning_rate": 1e-06,
"loss": 0.0334,
"num_tokens": 40382110.0,
"reward": 1.0974433422088623,
"reward_std": 0.9521243373552958,
"rewards/get_embedding_sim/mean": 0.516714076201121,
"rewards/get_embedding_sim/std": 0.12718145549297333,
"rewards/reward_num_unique_chars/mean": 0.1947579632202784,
"rewards/reward_num_unique_chars/std": 0.3964957594871521,
"step": 150
},
{
"epoch": 2.31758034026465,
"grad_norm": 0.9586585760116577,
"learning_rate": 1e-06,
"loss": 0.0347,
"step": 153
},
{
"epoch": 2.31758034026465,
"eval_clip_ratio/high_max": 0.0,
"eval_clip_ratio/high_mean": 0.0,
"eval_clip_ratio/low_mean": 0.0,
"eval_clip_ratio/low_min": 0.0,
"eval_clip_ratio/region_mean": 0.0,
"eval_completions/clipped_ratio": 0.07328869047619047,
"eval_completions/max_length": 872.6071428571429,
"eval_completions/max_terminated_length": 616.6071428571429,
"eval_completions/mean_length": 177.7972524847303,
"eval_completions/mean_terminated_length": 111.63820842334202,
"eval_completions/min_length": 14.25,
"eval_completions/min_terminated_length": 14.25,
"eval_kl": 0.15661403111049108,
"eval_loss": 0.05365554988384247,
"eval_num_tokens": 41144925.0,
"eval_reward": 1.0100113941090447,
"eval_reward_std": 0.9766364488750696,
"eval_rewards/get_embedding_sim/mean": 0.5055470722062247,
"eval_rewards/get_embedding_sim/std": 0.10236791674313801,
"eval_rewards/reward_num_unique_chars/mean": 0.16864551766775548,
"eval_rewards/reward_num_unique_chars/std": 0.3155075231833117,
"eval_runtime": 5610.1574,
"eval_samples_per_second": 0.01,
"eval_steps_per_second": 0.0,
"step": 153
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028211805555555563,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 850.1666666666666,
"completions/mean_length": 165.75738271077475,
"completions/mean_terminated_length": 140.97229131062826,
"completions/min_length": 6.666666666666667,
"completions/min_terminated_length": 6.666666666666667,
"epoch": 2.3629489603024574,
"grad_norm": 0.13255949318408966,
"kl": 0.1971893310546875,
"learning_rate": 1e-06,
"loss": 0.037,
"num_tokens": 41941119.0,
"reward": 1.0157166123390198,
"reward_std": 0.9526964128017426,
"rewards/get_embedding_sim/mean": 0.5066019793351492,
"rewards/get_embedding_sim/std": 0.11097632969419162,
"rewards/reward_num_unique_chars/mean": 0.17024830107887587,
"rewards/reward_num_unique_chars/std": 0.37277790407339734,
"step": 156
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.026909722222222248,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 911.0,
"completions/mean_length": 145.26649729410806,
"completions/mean_terminated_length": 121.1592280069987,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"epoch": 2.4083175803402646,
"grad_norm": 0.10361862182617188,
"kl": 0.1372528076171875,
"learning_rate": 1e-06,
"loss": 0.0383,
"num_tokens": 42690418.0,
"reward": 1.2976791461308796,
"reward_std": 1.1292773286501567,
"rewards/get_embedding_sim/mean": 0.5060124099254608,
"rewards/get_embedding_sim/std": 0.11317289372285207,
"rewards/reward_num_unique_chars/mean": 0.26442377765973407,
"rewards/reward_num_unique_chars/std": 0.43167150020599365,
"step": 159
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.026909722222222248,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 838.3333333333334,
"completions/mean_length": 145.79688008626303,
"completions/mean_terminated_length": 121.46813710530598,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 2.4536862003780717,
"grad_norm": 0.0873284786939621,
"kl": 0.17525736490885416,
"learning_rate": 1e-06,
"loss": 0.0382,
"num_tokens": 43439080.0,
"reward": 1.166001319885254,
"reward_std": 0.998184601465861,
"rewards/get_embedding_sim/mean": 0.5045428971449534,
"rewards/get_embedding_sim/std": 0.12551463643709818,
"rewards/reward_num_unique_chars/mean": 0.22076034545898438,
"rewards/reward_num_unique_chars/std": 0.40754825870196026,
"step": 162
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028645833333333332,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 890.0,
"completions/mean_length": 171.04340616861978,
"completions/mean_terminated_length": 145.85011291503906,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"epoch": 2.499054820415879,
"grad_norm": 0.0893421620130539,
"kl": 0.1564788818359375,
"learning_rate": 1e-06,
"loss": 0.039,
"num_tokens": 44225946.0,
"reward": 1.0508646965026855,
"reward_std": 1.0311030149459839,
"rewards/get_embedding_sim/mean": 0.49878130356470746,
"rewards/get_embedding_sim/std": 0.12252787003914516,
"rewards/reward_num_unique_chars/mean": 0.18418416877587637,
"rewards/reward_num_unique_chars/std": 0.38810135920842487,
"step": 165
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02256944444444442,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 859.3333333333334,
"completions/mean_length": 149.57465616861978,
"completions/mean_terminated_length": 129.38720703125,
"completions/min_length": 5.333333333333333,
"completions/min_terminated_length": 5.333333333333333,
"epoch": 2.544423440453686,
"grad_norm": 0.11376336216926575,
"kl": 0.17649332682291666,
"learning_rate": 1e-06,
"loss": 0.0415,
"num_tokens": 44975264.0,
"reward": 1.2923760414123535,
"reward_std": 1.15834375222524,
"rewards/get_embedding_sim/mean": 0.5293551087379456,
"rewards/get_embedding_sim/std": 0.12636979669332504,
"rewards/reward_num_unique_chars/mean": 0.255620613694191,
"rewards/reward_num_unique_chars/std": 0.4349779784679413,
"step": 168
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021701388888888878,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 920.6666666666666,
"completions/mean_length": 151.2604217529297,
"completions/mean_terminated_length": 131.89202372233072,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"epoch": 2.5897920604914932,
"grad_norm": 0.1699657291173935,
"kl": 0.176025390625,
"learning_rate": 1e-06,
"loss": 0.0416,
"num_tokens": 45727292.0,
"reward": 1.1844958066940308,
"reward_std": 1.0816868146260579,
"rewards/get_embedding_sim/mean": 0.5048082073529562,
"rewards/get_embedding_sim/std": 0.11827733864386876,
"rewards/reward_num_unique_chars/mean": 0.22690473993619284,
"rewards/reward_num_unique_chars/std": 0.41608301798502606,
"step": 171
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013888888888888876,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 993.0,
"completions/mean_length": 140.05816650390625,
"completions/mean_terminated_length": 127.6211166381836,
"completions/min_length": 6.333333333333333,
"completions/min_terminated_length": 6.333333333333333,
"epoch": 2.6351606805293004,
"grad_norm": 0.08062685281038284,
"kl": 0.240142822265625,
"learning_rate": 1e-06,
"loss": 0.0377,
"num_tokens": 46478463.0,
"reward": 1.0848047733306885,
"reward_std": 1.0851068099339802,
"rewards/get_embedding_sim/mean": 0.5197005073229471,
"rewards/get_embedding_sim/std": 0.11266261339187622,
"rewards/reward_num_unique_chars/mean": 0.1886785626411438,
"rewards/reward_num_unique_chars/std": 0.39136550823847455,
"step": 174
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025173611111111088,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 857.0,
"completions/mean_length": 142.6154530843099,
"completions/mean_terminated_length": 119.86089833577473,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 2.6805293005671076,
"grad_norm": 0.1051354631781578,
"kl": 0.22475179036458334,
"learning_rate": 1e-06,
"loss": 0.0389,
"num_tokens": 47225476.0,
"reward": 1.142371932665507,
"reward_std": 1.070401946703593,
"rewards/get_embedding_sim/mean": 0.49914271632830304,
"rewards/get_embedding_sim/std": 0.11159212638934453,
"rewards/reward_num_unique_chars/mean": 0.2148823787768682,
"rewards/reward_num_unique_chars/std": 0.40812622507413227,
"step": 177
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018229166666666668,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 900.0,
"completions/mean_length": 128.2951431274414,
"completions/mean_terminated_length": 111.62453969319661,
"completions/min_length": 7.333333333333333,
"completions/min_terminated_length": 7.333333333333333,
"epoch": 2.7258979206049148,
"grad_norm": 0.20546282827854156,
"kl": 0.283447265625,
"learning_rate": 1e-06,
"loss": 0.0403,
"num_tokens": 47955752.0,
"reward": 1.1857277949651082,
"reward_std": 1.1706757545471191,
"rewards/get_embedding_sim/mean": 0.513852725426356,
"rewards/get_embedding_sim/std": 0.12434107561906178,
"rewards/reward_num_unique_chars/mean": 0.23464342455069223,
"rewards/reward_num_unique_chars/std": 0.42252803842226666,
"step": 180
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01822916666666663,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 902.6666666666666,
"completions/mean_length": 130.4401067097982,
"completions/mean_terminated_length": 113.8047103881836,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 2.7712665406427224,
"grad_norm": 0.08767585456371307,
"kl": 0.4050394694010417,
"learning_rate": 1e-06,
"loss": 0.0431,
"num_tokens": 48670211.0,
"reward": 1.2997503280639648,
"reward_std": 1.2144495646158855,
"rewards/get_embedding_sim/mean": 0.5237086117267609,
"rewards/get_embedding_sim/std": 0.1108636533220609,
"rewards/reward_num_unique_chars/mean": 0.2586805572112401,
"rewards/reward_num_unique_chars/std": 0.4370884597301483,
"step": 183
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.023437500000000038,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 890.0,
"completions/mean_length": 135.30035146077475,
"completions/mean_terminated_length": 113.9994608561198,
"completions/min_length": 5.666666666666667,
"completions/min_terminated_length": 5.666666666666667,
"epoch": 2.816635160680529,
"grad_norm": 0.3577604591846466,
"kl": 0.2592061360677083,
"learning_rate": 1e-06,
"loss": 0.0498,
"num_tokens": 49402749.0,
"reward": 1.2939318418502808,
"reward_std": 1.2120266358057659,
"rewards/get_embedding_sim/mean": 0.5204942027727762,
"rewards/get_embedding_sim/std": 0.11977454274892807,
"rewards/reward_num_unique_chars/mean": 0.2582635283470154,
"rewards/reward_num_unique_chars/std": 0.43749914566675824,
"step": 186
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.013888888888888876,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 812.0,
"completions/mean_length": 109.67014058430989,
"completions/mean_terminated_length": 96.77264912923177,
"completions/min_length": 6.666666666666667,
"completions/min_terminated_length": 6.666666666666667,
"epoch": 2.8620037807183367,
"grad_norm": 0.1368367075920105,
"kl": 0.3499857584635417,
"learning_rate": 1e-06,
"loss": 0.0381,
"num_tokens": 50108497.0,
"reward": 1.5523497263590496,
"reward_std": 1.214170217514038,
"rewards/get_embedding_sim/mean": 0.534120500087738,
"rewards/get_embedding_sim/std": 0.10954815397659938,
"rewards/reward_num_unique_chars/mean": 0.3397156894207001,
"rewards/reward_num_unique_chars/std": 0.46985835830370587,
"step": 189
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01909722222222221,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 838.6666666666666,
"completions/mean_length": 125.33333841959636,
"completions/mean_terminated_length": 107.79783884684245,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"epoch": 2.9073724007561434,
"grad_norm": 1.6821552515029907,
"kl": 0.4345550537109375,
"learning_rate": 1e-06,
"loss": 0.0423,
"num_tokens": 50831329.0,
"reward": 1.3425734440485637,
"reward_std": 1.191293756167094,
"rewards/get_embedding_sim/mean": 0.5404900709788004,
"rewards/get_embedding_sim/std": 0.11695743352174759,
"rewards/reward_num_unique_chars/mean": 0.26780080795288086,
"rewards/reward_num_unique_chars/std": 0.44234869877497357,
"step": 192
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.009548611111111124,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 820.0,
"completions/mean_length": 102.75347646077473,
"completions/mean_terminated_length": 93.89341735839844,
"completions/min_length": 8.666666666666666,
"completions/min_terminated_length": 8.666666666666666,
"epoch": 2.952741020793951,
"grad_norm": 0.11723087728023529,
"kl": 0.3047281901041667,
"learning_rate": 1e-06,
"loss": 0.0332,
"num_tokens": 51524645.0,
"reward": 1.6216003100077312,
"reward_std": 1.1864676475524902,
"rewards/get_embedding_sim/mean": 0.5434751510620117,
"rewards/get_embedding_sim/std": 0.12491280088822047,
"rewards/reward_num_unique_chars/mean": 0.359375,
"rewards/reward_num_unique_chars/std": 0.4740845561027527,
"step": 195
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01848659003831421,
"completions/max_length": 1024.0,
"completions/max_terminated_length": 832.0,
"completions/mean_length": 131.5273691813151,
"completions/mean_terminated_length": 114.69828796386719,
"completions/min_length": 6.666666666666667,
"completions/min_terminated_length": 6.666666666666667,
"epoch": 2.998109640831758,
"grad_norm": 0.10458555072546005,
"kl": 0.2809855143229167,
"learning_rate": 1e-06,
"loss": 0.0481,
"num_tokens": 52251674.0,
"reward": 1.4293763637542725,
"reward_std": 1.2686160405476887,
"rewards/get_embedding_sim/mean": 0.5335429906845093,
"rewards/get_embedding_sim/std": 0.11212129394213359,
"rewards/reward_num_unique_chars/mean": 0.29888081053892773,
"rewards/reward_num_unique_chars/std": 0.4557340343793233,
"step": 198
}
],
"logging_steps": 3,
"max_steps": 198,
"num_input_tokens_seen": 52251674,
"num_train_epochs": 3,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}