{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4999, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 937.8125, "completions/mean_terminated_length": 937.8125, "completions/min_length": 628.0, "completions/min_terminated_length": 628.0, "epoch": 0.00020004000800160032, "frac_reward_zero_std": 0.0, "grad_norm": 3.527628499401798, "kl": 0.0019626617431640625, "learning_rate": 0.0, "loss": -0.0356, "num_tokens": 38581.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9395993947982788, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06332202020797816, "rewards/wordcountpos_reward/raw_geo/std": 0.11430166382536866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1377060745318193, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1151.75, "completions/mean_terminated_length": 1151.75, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.00040008001600320064, "frac_reward_zero_std": 0.0, "grad_norm": 3.3088173413126896, "kl": 0.0019207000732421875, "learning_rate": 2e-09, "loss": 0.0185, "num_tokens": 81577.0, "reward": 0.0, "reward_std": 0.945651113986969, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04572642141401159, "rewards/wordcountpos_reward/raw_geo/std": 0.13785616546096172, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1036.1875, "completions/mean_terminated_length": 1036.1875, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.000600120024004801, "frac_reward_zero_std": 0.0, "grad_norm": 3.2279400814834562, "kl": 0.0019130706787109375, "learning_rate": 4e-09, "loss": 0.008, "num_tokens": 124140.0, "reward": 2.9802322387695312e-08, "reward_std": 0.840162992477417, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015641604855250783, "rewards/wordcountpos_reward/raw_geo/std": 0.07693005917161888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1124.7333984375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.0008001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 3.0016251460966408, "kl": 0.0017414093017578125, "learning_rate": 6e-09, "loss": 0.0202, "num_tokens": 160679.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5394235253334045, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03780968722152463, "rewards/wordcountpos_reward/raw_geo/std": 0.06425658756004914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 1109.0, "completions/mean_terminated_length": 1109.0, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.0010002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 1.7236298250304891, "kl": 0.0007333755493164062, "learning_rate": 8e-09, "loss": -0.0022, "num_tokens": 203647.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9491308927536011, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06726112195430183, "rewards/wordcountpos_reward/raw_geo/std": 0.17384160321616854, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 977.0625, "completions/mean_terminated_length": 977.0625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.001200240048009602, "frac_reward_zero_std": 0.0, "grad_norm": 2.7082803572559015, "kl": 0.0012769699096679688, "learning_rate": 1e-08, "loss": 0.0378, "num_tokens": 251608.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8895343542098999, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03377319362428406, "rewards/wordcountpos_reward/raw_geo/std": 0.09072124816242787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1205.4375, "completions/mean_terminated_length": 1107.25, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.0014002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 1.947326967663354, "kl": 0.00078582763671875, "learning_rate": 1.2e-08, "loss": 0.0185, "num_tokens": 304455.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0025546550750732, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3640622136620862, "rewards/wordcountpos_reward/raw_geo/std": 0.21229947965348825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590966, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1096.125, "completions/mean_terminated_length": 1096.125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.0016003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.592799510181172, "kl": 0.002285003662109375, "learning_rate": 1.4e-08, "loss": 0.005, "num_tokens": 345425.0, "reward": 0.0, "reward_std": 0.8836174011230469, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04584762897460425, "rewards/wordcountpos_reward/raw_geo/std": 0.20300709497527086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1271.5, "completions/mean_terminated_length": 1218.769287109375, "completions/min_length": 1101.0, "completions/min_terminated_length": 1101.0, "epoch": 0.0018003600720144029, "frac_reward_zero_std": 0.0, "grad_norm": 2.9518801291571797, "kl": 0.001861572265625, "learning_rate": 1.6e-08, "loss": 0.0282, "num_tokens": 386905.0, "reward": 0.0, "reward_std": 0.8072527647018433, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0038624619297310207, "rewards/wordcountpos_reward/raw_geo/std": 0.0666643954266734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923409, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 978.1875, "completions/mean_terminated_length": 978.1875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.002000400080016003, "frac_reward_zero_std": 0.0, "grad_norm": 2.298998492558027, "kl": 0.0007266998291015625, "learning_rate": 1.8e-08, "loss": -0.0304, "num_tokens": 426236.0, "reward": 7.450580596923828e-09, "reward_std": 1.0550340414047241, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.031131642601040573, "rewards/wordcountpos_reward/raw_geo/std": 0.06226754416007678, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1004.0625, "completions/mean_terminated_length": 1004.0625, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.0022004400880176033, "frac_reward_zero_std": 0.0, "grad_norm": 3.6545045637961344, "kl": 0.002613067626953125, "learning_rate": 2e-08, "loss": -0.0281, "num_tokens": 465741.0, "reward": 2.9802322387695312e-08, "reward_std": 0.602470338344574, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.092484229826523, "rewards/wordcountpos_reward/raw_geo/std": 0.10452240495740213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 919.3125, "completions/mean_terminated_length": 880.6000366210938, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.002400480096019204, "frac_reward_zero_std": 0.0, "grad_norm": 3.649675238428009, "kl": 0.0022182464599609375, "learning_rate": 2.2e-08, "loss": 0.0036, "num_tokens": 504690.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8824746608734131, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1325748640775109, "rewards/wordcountpos_reward/raw_geo/std": 0.08739242698581638, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1051.25, "completions/mean_terminated_length": 1021.3333740234375, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.002600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 3.776655556102875, "kl": 0.002254486083984375, "learning_rate": 2.4e-08, "loss": -0.0502, "num_tokens": 543934.0, "reward": 0.0, "reward_std": 1.0007752180099487, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09221717605913836, "rewards/wordcountpos_reward/raw_geo/std": 0.10661654246927132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 989.1875, "completions/mean_terminated_length": 989.1875, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.0028005601120224045, "frac_reward_zero_std": 0.0, "grad_norm": 3.4045263885075374, "kl": 0.0021114349365234375, "learning_rate": 2.5999999999999998e-08, "loss": -0.0557, "num_tokens": 586153.0, "reward": 0.0, "reward_std": 0.9987242221832275, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07328233140544992, "rewards/wordcountpos_reward/raw_geo/std": 0.07773590256422544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1408308678285174, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1407.875, "completions/mean_terminated_length": 1366.0, "completions/min_length": 1205.0, "completions/min_terminated_length": 1205.0, "epoch": 0.0030006001200240046, "frac_reward_zero_std": 0.0, "grad_norm": 2.6637674347345706, "kl": 0.001651763916015625, "learning_rate": 2.8e-08, "loss": 0.0042, "num_tokens": 637407.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9598821401596069, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18890342937585533, "rewards/wordcountpos_reward/raw_geo/std": 0.08918939774881607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 941.5625, "completions/mean_terminated_length": 941.5625, "completions/min_length": 728.0, "completions/min_terminated_length": 728.0, "epoch": 0.003200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 3.7902949247967315, "kl": 0.00201416015625, "learning_rate": 3e-08, "loss": -0.0211, "num_tokens": 680032.0, "reward": -5.960464477539063e-08, "reward_std": 0.6785845160484314, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18800039214593786, "rewards/wordcountpos_reward/raw_geo/std": 0.24516681115323696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262936, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1181.875, "completions/mean_terminated_length": 1160.666748046875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.0034006801360272052, "frac_reward_zero_std": 0.0, "grad_norm": 3.530423206396441, "kl": 0.0025482177734375, "learning_rate": 3.2e-08, "loss": 0.0551, "num_tokens": 727830.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5536831021308899, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13579335046363653, "rewards/wordcountpos_reward/raw_geo/std": 0.08194666431534924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1241.4285888671875, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.0036007201440288058, "frac_reward_zero_std": 0.0, "grad_norm": 3.1057616181704906, "kl": 0.002300262451171875, "learning_rate": 3.4e-08, "loss": -0.0332, "num_tokens": 763338.0, "reward": 2.9802322387695312e-08, "reward_std": 0.930446982383728, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17500146785117016, "rewards/wordcountpos_reward/raw_geo/std": 0.07142286225477365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1071.3125, "completions/mean_terminated_length": 1071.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.003800760152030406, "frac_reward_zero_std": 0.0, "grad_norm": 3.4713607485961644, "kl": 0.00254058837890625, "learning_rate": 3.6e-08, "loss": 0.0183, "num_tokens": 809799.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9527382254600525, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04392219472145073, "rewards/wordcountpos_reward/raw_geo/std": 0.19282724057465933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1377060745318193, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1179.5, "completions/mean_terminated_length": 1158.1334228515625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.004000800160032006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1363202725296784, "kl": 0.0021038055419921875, "learning_rate": 3.7999999999999996e-08, "loss": 0.0005, "num_tokens": 862751.0, "reward": -1.862645149230957e-08, "reward_std": 1.060667634010315, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2620446166707721, "rewards/wordcountpos_reward/raw_geo/std": 0.21947933776825151, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1016.8125, "completions/mean_terminated_length": 1016.8125, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.004200840168033607, "frac_reward_zero_std": 0.0, "grad_norm": 3.3269915098256053, "kl": 0.002140045166015625, "learning_rate": 4e-08, "loss": -0.0082, "num_tokens": 906964.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8853997588157654, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09927080747029668, "rewards/wordcountpos_reward/raw_geo/std": 0.0805268646754127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1548595540529595, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1074.0, "completions/mean_terminated_length": 1074.0, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.004400880176035207, "frac_reward_zero_std": 0.0, "grad_norm": 3.567377949806492, "kl": 0.002490997314453125, "learning_rate": 4.2e-08, "loss": -0.0244, "num_tokens": 947620.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8644933104515076, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07925202687080059, "rewards/wordcountpos_reward/raw_geo/std": 0.0758299246895885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 987.125, "completions/mean_terminated_length": 987.125, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.004600920184036807, "frac_reward_zero_std": 0.0, "grad_norm": 4.208611808318897, "kl": 0.002841949462890625, "learning_rate": 4.4e-08, "loss": -0.0074, "num_tokens": 999062.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0427882671356201, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03526983305268585, "rewards/wordcountpos_reward/raw_geo/std": 0.04733718241866139, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1274.4375, "completions/mean_terminated_length": 1171.9091796875, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.004800960192038408, "frac_reward_zero_std": 0.0, "grad_norm": 3.065859066921973, "kl": 0.002498626708984375, "learning_rate": 4.5999999999999995e-08, "loss": 0.0059, "num_tokens": 1048981.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6512476205825806, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11186146845153422, "rewards/wordcountpos_reward/raw_geo/std": 0.2219636274296806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.15864005379054394, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1390.0, "completions/mean_terminated_length": 1248.571533203125, "completions/min_length": 1130.0, "completions/min_terminated_length": 1130.0, "epoch": 0.005001000200040008, "frac_reward_zero_std": 0.0, "grad_norm": 2.0696160118781535, "kl": 0.0010509490966796875, "learning_rate": 4.8e-08, "loss": -0.0113, "num_tokens": 1095773.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0030694007873535, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14824852639004496, "rewards/wordcountpos_reward/raw_geo/std": 0.12155263262885863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1109.75, "completions/mean_terminated_length": 1109.75, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.005201040208041608, "frac_reward_zero_std": 0.0, "grad_norm": 3.5808904483899155, "kl": 0.00238800048828125, "learning_rate": 5e-08, "loss": -0.0043, "num_tokens": 1138457.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9966350793838501, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05702305702017543, "rewards/wordcountpos_reward/raw_geo/std": 0.11436940928081135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1195.5625, "completions/mean_terminated_length": 1175.2667236328125, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.0054010802160432084, "frac_reward_zero_std": 0.0, "grad_norm": 3.3502685015860543, "kl": 0.0023136138916015625, "learning_rate": 5.1999999999999996e-08, "loss": 0.0204, "num_tokens": 1179274.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9909127950668335, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07939162887681046, "rewards/wordcountpos_reward/raw_geo/std": 0.10428932455488632, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 927.5625, "completions/mean_terminated_length": 927.5625, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.005601120224044809, "frac_reward_zero_std": 0.0, "grad_norm": 3.497570534065496, "kl": 0.0017910003662109375, "learning_rate": 5.3999999999999994e-08, "loss": -0.0337, "num_tokens": 1228139.0, "reward": 0.0, "reward_std": 0.8223745822906494, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06698038716999538, "rewards/wordcountpos_reward/raw_geo/std": 0.465609639335268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1221.75, "completions/mean_terminated_length": 1221.75, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.0058011602320464095, "frac_reward_zero_std": 0.0, "grad_norm": 3.154653031446095, "kl": 0.00206756591796875, "learning_rate": 5.6e-08, "loss": -0.0105, "num_tokens": 1277127.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9979424476623535, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05153375274395769, "rewards/wordcountpos_reward/raw_geo/std": 0.08762850367344728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1087.6875, "completions/mean_terminated_length": 1087.6875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.006001200240048009, "frac_reward_zero_std": 0.0, "grad_norm": 2.769965263951847, "kl": 0.0016193389892578125, "learning_rate": 5.8e-08, "loss": 0.0384, "num_tokens": 1319818.0, "reward": -5.960464477539063e-08, "reward_std": 0.653556227684021, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09990521663314691, "rewards/wordcountpos_reward/raw_geo/std": 0.1792054790335709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534189, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1236.875, "completions/mean_terminated_length": 1149.166748046875, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.00620124024804961, "frac_reward_zero_std": 0.0, "grad_norm": 2.552651474192458, "kl": 0.0018463134765625, "learning_rate": 6e-08, "loss": -0.0162, "num_tokens": 1371968.0, "reward": 0.0, "reward_std": 0.4588787257671356, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15320880545708118, "rewards/wordcountpos_reward/raw_geo/std": 0.1900779379839505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18130187635645245, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 976.375, "completions/mean_terminated_length": 941.4667358398438, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.00640128025605121, "frac_reward_zero_std": 0.0, "grad_norm": 3.909107751019465, "kl": 0.0029449462890625, "learning_rate": 6.2e-08, "loss": -0.079, "num_tokens": 1416894.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5451881289482117, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16118998121042316, "rewards/wordcountpos_reward/raw_geo/std": 0.18611854456660606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1082.9375, "completions/mean_terminated_length": 1055.1334228515625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.006601320264052811, "frac_reward_zero_std": 0.0, "grad_norm": 2.5692180139121326, "kl": 0.00179290771484375, "learning_rate": 6.4e-08, "loss": -0.1914, "num_tokens": 1468141.0, "reward": -1.4901161193847656e-08, "reward_std": 0.882655143737793, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037042266367277546, "rewards/wordcountpos_reward/raw_geo/std": 0.07329230609937216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15776212754932312, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1064.0, "completions/max_terminated_length": 1064.0, "completions/mean_length": 958.1875, "completions/mean_terminated_length": 958.1875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.0068013602720544105, "frac_reward_zero_std": 0.0, "grad_norm": 3.18763452766036, "kl": 0.002197265625, "learning_rate": 6.6e-08, "loss": -0.0013, "num_tokens": 1519768.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9640591144561768, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2373595299986506, "rewards/wordcountpos_reward/raw_geo/std": 0.2979919818880407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1034.5, "completions/mean_terminated_length": 1034.5, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.007001400280056011, "frac_reward_zero_std": 0.0, "grad_norm": 3.0402526128518494, "kl": 0.0015888214111328125, "learning_rate": 6.8e-08, "loss": -0.0131, "num_tokens": 1557584.0, "reward": 0.0, "reward_std": 0.7507257461547852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1241539385675051, "rewards/wordcountpos_reward/raw_geo/std": 0.16992359920941463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1234.375, "completions/mean_terminated_length": 1196.4285888671875, "completions/min_length": 1033.0, "completions/min_terminated_length": 1033.0, "epoch": 0.0072014402880576115, "frac_reward_zero_std": 0.0, "grad_norm": 2.670408929151118, "kl": 0.0012850761413574219, "learning_rate": 7e-08, "loss": -0.0298, "num_tokens": 1604230.0, "reward": 0.0, "reward_std": 0.7335447669029236, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0038398443027448494, "rewards/wordcountpos_reward/raw_geo/std": 0.133285123615264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 968.9375, "completions/mean_terminated_length": 968.9375, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.007401480296059212, "frac_reward_zero_std": 0.0, "grad_norm": 3.608250084251578, "kl": 0.0019969940185546875, "learning_rate": 7.2e-08, "loss": 0.0085, "num_tokens": 1636989.0, "reward": 0.0, "reward_std": 0.8306714296340942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01242248777773371, "rewards/wordcountpos_reward/raw_geo/std": 0.046526047999266275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1364.75, "completions/mean_terminated_length": 1333.5384521484375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.007601520304060812, "frac_reward_zero_std": 0.0, "grad_norm": 2.678383839115574, "kl": 0.0016307830810546875, "learning_rate": 7.399999999999999e-08, "loss": 0.0346, "num_tokens": 1687353.0, "reward": 0.0, "reward_std": 0.7261468172073364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06788943902991768, "rewards/wordcountpos_reward/raw_geo/std": 0.07790495715118463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.007801560312062412, "frac_reward_zero_std": 0.0, "grad_norm": 3.3785119569476842, "kl": 0.002109527587890625, "learning_rate": 7.599999999999999e-08, "loss": -0.0089, "num_tokens": 1730287.0, "reward": 2.9802322387695312e-08, "reward_std": 0.801064670085907, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08157536204093915, "rewards/wordcountpos_reward/raw_geo/std": 0.2274168733107394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1272.9375, "completions/mean_terminated_length": 1257.800048828125, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.008001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 2.627387832651442, "kl": 0.0014858245849609375, "learning_rate": 7.8e-08, "loss": -0.0217, "num_tokens": 1785742.0, "reward": 1.4901161193847656e-08, "reward_std": 1.061901330947876, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.42870538452307183, "rewards/wordcountpos_reward/raw_geo/std": 0.263985549175234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048495895206211566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1158.75, "completions/mean_terminated_length": 1158.75, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.008201640328065612, "frac_reward_zero_std": 0.0, "grad_norm": 2.522238776465091, "kl": 0.0015172958374023438, "learning_rate": 8e-08, "loss": 0.0333, "num_tokens": 1816978.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6761976480484009, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.005377481638485537, "rewards/wordcountpos_reward/raw_geo/std": 0.0761497347320364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1321.75, "completions/mean_terminated_length": 1240.727294921875, "completions/min_length": 1064.0, "completions/min_terminated_length": 1064.0, "epoch": 0.008401680336067214, "frac_reward_zero_std": 0.0, "grad_norm": 2.7279118100114075, "kl": 0.001800537109375, "learning_rate": 8.2e-08, "loss": 0.0261, "num_tokens": 1852822.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9055695533752441, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04670191211284185, "rewards/wordcountpos_reward/raw_geo/std": 0.03744635498643519, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1237.25, "completions/mean_terminated_length": 1237.25, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.008601720344068814, "frac_reward_zero_std": 0.0, "grad_norm": 2.7283548320320885, "kl": 0.001575469970703125, "learning_rate": 8.4e-08, "loss": -0.0114, "num_tokens": 1906586.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0410590171813965, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06558977500215979, "rewards/wordcountpos_reward/raw_geo/std": 0.10948238148185759, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1128.625, "completions/mean_terminated_length": 1103.86669921875, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.008801760352070413, "frac_reward_zero_std": 0.0, "grad_norm": 2.941551231363272, "kl": 0.0016937255859375, "learning_rate": 8.599999999999999e-08, "loss": -0.016, "num_tokens": 1951372.0, "reward": 7.450580596923828e-09, "reward_std": 1.007286787033081, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.047986016667105835, "rewards/wordcountpos_reward/raw_geo/std": 0.1647848488669715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1229.3125, "completions/mean_terminated_length": 1190.6429443359375, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.009001800360072015, "frac_reward_zero_std": 0.0, "grad_norm": 2.7094244164897625, "kl": 0.0015301704406738281, "learning_rate": 8.8e-08, "loss": 0.0075, "num_tokens": 1997345.0, "reward": 0.0, "reward_std": 0.49579331278800964, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014463292505470429, "rewards/wordcountpos_reward/raw_geo/std": 0.06079814898465263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1120.0, "completions/max_terminated_length": 1120.0, "completions/mean_length": 969.75, "completions/mean_terminated_length": 969.75, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.009201840368073614, "frac_reward_zero_std": 0.0, "grad_norm": 3.534484823231903, "kl": 0.0018787384033203125, "learning_rate": 9e-08, "loss": 0.0016, "num_tokens": 2043125.0, "reward": 0.0, "reward_std": 1.0315972566604614, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01297143453701503, "rewards/wordcountpos_reward/raw_geo/std": 0.07119654640895512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1019.5625, "completions/mean_terminated_length": 1019.5625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.009401880376075216, "frac_reward_zero_std": 0.0, "grad_norm": 3.463254958479964, "kl": 0.0019664764404296875, "learning_rate": 9.199999999999999e-08, "loss": -0.0589, "num_tokens": 2082446.0, "reward": 0.0, "reward_std": 0.9849706888198853, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0843871901886455, "rewards/wordcountpos_reward/raw_geo/std": 0.09872120752646156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1258.9375, "completions/mean_terminated_length": 1242.86669921875, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.009601920384076815, "frac_reward_zero_std": 0.0, "grad_norm": 2.971385125688432, "kl": 0.0019779205322265625, "learning_rate": 9.4e-08, "loss": -0.0584, "num_tokens": 2132933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6457346081733704, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16243873636014147, "rewards/wordcountpos_reward/raw_geo/std": 0.1255084780802457, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362769, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1047.0, "completions/max_terminated_length": 1047.0, "completions/mean_length": 942.4375, "completions/mean_terminated_length": 942.4375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.009801960392078415, "frac_reward_zero_std": 0.0, "grad_norm": 3.3066241610973615, "kl": 0.001689910888671875, "learning_rate": 9.6e-08, "loss": -0.0143, "num_tokens": 2168836.0, "reward": 0.0, "reward_std": 0.5142208337783813, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20072281556082983, "rewards/wordcountpos_reward/raw_geo/std": 0.16580287533649615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1248.6875, "completions/mean_terminated_length": 1190.6923828125, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.010002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 2.4640311998220077, "kl": 0.0012788772583007812, "learning_rate": 9.8e-08, "loss": 0.0142, "num_tokens": 2217639.0, "reward": 0.0, "reward_std": 0.8277969360351562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16234292152381402, "rewards/wordcountpos_reward/raw_geo/std": 0.16072601120735497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1001.5625, "completions/mean_terminated_length": 1001.5625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.010202040408081616, "frac_reward_zero_std": 0.0, "grad_norm": 2.943966756593823, "kl": 0.001708984375, "learning_rate": 1e-07, "loss": -0.0133, "num_tokens": 2268408.0, "reward": -1.4901161193847656e-08, "reward_std": 0.870779812335968, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13615339609111313, "rewards/wordcountpos_reward/raw_geo/std": 0.1670364547720456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0718795288428261, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1121.1875, "completions/mean_terminated_length": 1121.1875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.010402080416083216, "frac_reward_zero_std": 0.0, "grad_norm": 3.58703209291831, "kl": 0.002361297607421875, "learning_rate": 1.0199999999999999e-07, "loss": -0.0171, "num_tokens": 2311075.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9936850070953369, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04702748511650438, "rewards/wordcountpos_reward/raw_geo/std": 0.07679069169034027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1177.6875, "completions/mean_terminated_length": 1177.6875, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.010602120424084817, "frac_reward_zero_std": 0.0, "grad_norm": 3.451249976717016, "kl": 0.002536773681640625, "learning_rate": 1.0399999999999999e-07, "loss": -0.0061, "num_tokens": 2364150.0, "reward": 0.0, "reward_std": 0.8075898289680481, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2132490017495051, "rewards/wordcountpos_reward/raw_geo/std": 0.14247833058642853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1113.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 980.1875, "completions/mean_terminated_length": 980.1875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.010802160432086417, "frac_reward_zero_std": 0.0, "grad_norm": 3.7826596529915872, "kl": 0.002460479736328125, "learning_rate": 1.06e-07, "loss": 0.0153, "num_tokens": 2413217.0, "reward": 5.960464477539063e-08, "reward_std": 0.8012369275093079, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011927149475152986, "rewards/wordcountpos_reward/raw_geo/std": 0.1738858621057102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.625, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 946.8125, "completions/mean_terminated_length": 946.8125, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.011002200440088018, "frac_reward_zero_std": 0.0, "grad_norm": 3.103545716226999, "kl": 0.001865386962890625, "learning_rate": 1.0799999999999999e-07, "loss": -0.0148, "num_tokens": 2453518.0, "reward": 0.0, "reward_std": 0.7228332757949829, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08228915216070509, "rewards/wordcountpos_reward/raw_geo/std": 0.14798677113522873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1060.625, "completions/mean_terminated_length": 1060.625, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.011202240448089618, "frac_reward_zero_std": 0.0, "grad_norm": 2.6791683165663094, "kl": 0.0012788772583007812, "learning_rate": 1.0999999999999999e-07, "loss": 0.002, "num_tokens": 2489912.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0635603666305542, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10841362943864043, "rewards/wordcountpos_reward/raw_geo/std": 0.07803994538825557, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1207.5, "completions/mean_terminated_length": 1165.71435546875, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.011402280456091218, "frac_reward_zero_std": 0.0, "grad_norm": 2.846461532887261, "kl": 0.001705169677734375, "learning_rate": 1.12e-07, "loss": 0.0077, "num_tokens": 2531960.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8560097217559814, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04001479999848212, "rewards/wordcountpos_reward/raw_geo/std": 0.15671627229466206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1130.4375, "completions/mean_terminated_length": 962.45458984375, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.011602320464092819, "frac_reward_zero_std": 0.0, "grad_norm": 3.21901567309924, "kl": 0.0023040771484375, "learning_rate": 1.14e-07, "loss": -0.0568, "num_tokens": 2574079.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0615627765655518, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12349942196245112, "rewards/wordcountpos_reward/raw_geo/std": 0.1955286826790732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1146.0, "completions/max_terminated_length": 1146.0, "completions/mean_length": 1022.0, "completions/mean_terminated_length": 1022.0, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.011802360472094419, "frac_reward_zero_std": 0.0, "grad_norm": 2.8956508751641175, "kl": 0.0021076202392578125, "learning_rate": 1.16e-07, "loss": 0.0151, "num_tokens": 2621215.0, "reward": 0.0, "reward_std": 0.7808208465576172, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03287686977835749, "rewards/wordcountpos_reward/raw_geo/std": 0.06689788514164367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 1053.9375, "completions/mean_terminated_length": 1053.9375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.012002400480096018, "frac_reward_zero_std": 0.0, "grad_norm": 3.2089278709869995, "kl": 0.001739501953125, "learning_rate": 1.1799999999999998e-07, "loss": -0.0092, "num_tokens": 2671142.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9888018369674683, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13551425466957498, "rewards/wordcountpos_reward/raw_geo/std": 0.10539356700345409, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.077817450199525, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1009.125, "completions/mean_terminated_length": 1009.125, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.01220244048809762, "frac_reward_zero_std": 0.0, "grad_norm": 3.400680270630255, "kl": 0.001728057861328125, "learning_rate": 1.2e-07, "loss": -0.0062, "num_tokens": 2713448.0, "reward": -5.960464477539063e-08, "reward_std": 0.443503201007843, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04001027402421124, "rewards/wordcountpos_reward/raw_geo/std": 0.1541240406982376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1117.0, "completions/max_terminated_length": 1117.0, "completions/mean_length": 992.625, "completions/mean_terminated_length": 992.625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.01240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.423086202681301, "kl": 0.0015954971313476562, "learning_rate": 1.2199999999999998e-07, "loss": -0.0236, "num_tokens": 2755442.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9328954219818115, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07611894928599701, "rewards/wordcountpos_reward/raw_geo/std": 0.06957971047329209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.01260252050410082, "frac_reward_zero_std": 0.0, "grad_norm": 3.0369902335352212, "kl": 0.001537322998046875, "learning_rate": 1.24e-07, "loss": 0.005, "num_tokens": 2794957.0, "reward": 0.0, "reward_std": 0.8936575651168823, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.023587060511318154, "rewards/wordcountpos_reward/raw_geo/std": 0.039398007676348705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 951.75, "completions/mean_terminated_length": 951.75, "completions/min_length": 592.0, "completions/min_terminated_length": 592.0, "epoch": 0.01280256051210242, "frac_reward_zero_std": 0.0, "grad_norm": 3.971300381599344, "kl": 0.00274658203125, "learning_rate": 1.26e-07, "loss": 0.0051, "num_tokens": 2835233.0, "reward": 7.450580596923828e-09, "reward_std": 1.0385068655014038, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13802905831220774, "rewards/wordcountpos_reward/raw_geo/std": 0.1624069689135552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1110.1875, "completions/mean_terminated_length": 1110.1875, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.01300260052010402, "frac_reward_zero_std": 0.0, "grad_norm": 3.123944987966859, "kl": 0.0017490386962890625, "learning_rate": 1.28e-07, "loss": 0.0286, "num_tokens": 2875796.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0527963638305664, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023763320717478175, "rewards/wordcountpos_reward/raw_geo/std": 0.10936544193982757, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0843274042711568, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1208.75, "completions/mean_terminated_length": 1208.75, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.013202640528105622, "frac_reward_zero_std": 0.0, "grad_norm": 3.6583881912767886, "kl": 0.00267791748046875, "learning_rate": 1.3e-07, "loss": -0.048, "num_tokens": 2928976.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8107168078422546, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3147331285068067, "rewards/wordcountpos_reward/raw_geo/std": 0.2895872014821923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1605545943838973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1028.375, "completions/mean_terminated_length": 1028.375, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.013402680536107221, "frac_reward_zero_std": 0.0, "grad_norm": 3.433802235170335, "kl": 0.002140045166015625, "learning_rate": 1.32e-07, "loss": -0.0363, "num_tokens": 2962630.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0483653545379639, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.046053671892313776, "rewards/wordcountpos_reward/raw_geo/std": 0.18605159095266544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1174.125, "completions/mean_terminated_length": 1152.4000244140625, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.013602720544108821, "frac_reward_zero_std": 0.0, "grad_norm": 3.027913748087244, "kl": 0.0019359588623046875, "learning_rate": 1.34e-07, "loss": -0.0346, "num_tokens": 3008536.0, "reward": 5.960464477539063e-08, "reward_std": 0.7146797776222229, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06700316140097935, "rewards/wordcountpos_reward/raw_geo/std": 0.08654563346921694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.061913918736689035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1345.6875, "completions/mean_terminated_length": 1310.0770263671875, "completions/min_length": 1175.0, "completions/min_terminated_length": 1175.0, "epoch": 0.013802760552110422, "frac_reward_zero_std": 0.0, "grad_norm": 2.5735353978975524, "kl": 0.0017490386962890625, "learning_rate": 1.36e-07, "loss": -0.03, "num_tokens": 3054915.0, "reward": 0.0, "reward_std": 0.8835979700088501, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03346151161269269, "rewards/wordcountpos_reward/raw_geo/std": 0.11525915276451144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1177.8125, "completions/mean_terminated_length": 1156.3333740234375, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.014002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 1.7794689083003994, "kl": 0.000682830810546875, "learning_rate": 1.3800000000000002e-07, "loss": -0.0068, "num_tokens": 3102320.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8326509594917297, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.268538629523843, "rewards/wordcountpos_reward/raw_geo/std": 0.23431183749792447, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1066.125, "completions/mean_terminated_length": 1066.125, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.014202840568113623, "frac_reward_zero_std": 0.0, "grad_norm": 3.381034537286166, "kl": 0.0021343231201171875, "learning_rate": 1.4e-07, "loss": -0.0082, "num_tokens": 3151290.0, "reward": 0.0, "reward_std": 0.6858251094818115, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05580609313469934, "rewards/wordcountpos_reward/raw_geo/std": 0.10850381029456178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1091.5625, "completions/mean_terminated_length": 1064.3333740234375, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.014402880576115223, "frac_reward_zero_std": 0.0, "grad_norm": 3.384692361783582, "kl": 0.002414703369140625, "learning_rate": 1.4199999999999997e-07, "loss": -0.0146, "num_tokens": 3201067.0, "reward": 0.0, "reward_std": 0.6135813593864441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1346096225740474, "rewards/wordcountpos_reward/raw_geo/std": 0.06833792668093613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1253.0, "completions/mean_terminated_length": 1253.0, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.014602920584116823, "frac_reward_zero_std": 0.0, "grad_norm": 3.219061114628507, "kl": 0.00223541259765625, "learning_rate": 1.44e-07, "loss": -0.0127, "num_tokens": 3248835.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9600502848625183, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14092874197771316, "rewards/wordcountpos_reward/raw_geo/std": 0.13795048980720287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1054.6875, "completions/mean_terminated_length": 1054.6875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.014802960592118424, "frac_reward_zero_std": 0.0, "grad_norm": 2.043617920670456, "kl": 0.0006856918334960938, "learning_rate": 1.4599999999999998e-07, "loss": 0.0021, "num_tokens": 3286950.0, "reward": -3.725290298461914e-09, "reward_std": 1.0264079570770264, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16163362017433497, "rewards/wordcountpos_reward/raw_geo/std": 0.2113384531220164, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1288.0625, "completions/mean_terminated_length": 1273.933349609375, "completions/min_length": 1119.0, "completions/min_terminated_length": 1119.0, "epoch": 0.015003000600120024, "frac_reward_zero_std": 0.0, "grad_norm": 2.463983506646221, "kl": 0.001415252685546875, "learning_rate": 1.4799999999999998e-07, "loss": -0.0151, "num_tokens": 3340703.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0527896881103516, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03372691713834564, "rewards/wordcountpos_reward/raw_geo/std": 0.12906610677594696, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1164.0, "completions/mean_terminated_length": 1164.0, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.015203040608121624, "frac_reward_zero_std": 0.0, "grad_norm": 2.5198558255828356, "kl": 0.0014171600341796875, "learning_rate": 1.5e-07, "loss": 0.0079, "num_tokens": 3378743.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5425284504890442, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10821390293511987, "rewards/wordcountpos_reward/raw_geo/std": 0.11504616127711416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.061913918736689035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1070.9375, "completions/mean_terminated_length": 1070.9375, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.015403080616123225, "frac_reward_zero_std": 0.0, "grad_norm": 2.846890204409714, "kl": 0.0019207000732421875, "learning_rate": 1.5199999999999998e-07, "loss": 0.0047, "num_tokens": 3424662.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7099167108535767, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03401851122698256, "rewards/wordcountpos_reward/raw_geo/std": 0.06980181730189261, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1154.875, "completions/mean_terminated_length": 998.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.015603120624124825, "frac_reward_zero_std": 0.0, "grad_norm": 2.558696511042277, "kl": 0.001270294189453125, "learning_rate": 1.54e-07, "loss": 0.0024, "num_tokens": 3471036.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9273952841758728, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.048057120449891015, "rewards/wordcountpos_reward/raw_geo/std": 0.08042709328156214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1019.875, "completions/mean_terminated_length": 1019.875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.015803160632126424, "frac_reward_zero_std": 0.0, "grad_norm": 2.809161072413743, "kl": 0.0020294189453125, "learning_rate": 1.56e-07, "loss": -0.0282, "num_tokens": 3519450.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0270659923553467, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.24448510858828998, "rewards/wordcountpos_reward/raw_geo/std": 0.19179764342402397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1009.875, "completions/mean_terminated_length": 1009.875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.016003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.742546948619, "kl": 0.00244903564453125, "learning_rate": 1.5799999999999999e-07, "loss": -0.0304, "num_tokens": 3560952.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0620218515396118, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.284031328458443, "rewards/wordcountpos_reward/raw_geo/std": 0.18176904643064618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1104.5, "completions/mean_terminated_length": 1104.5, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.016203240648129627, "frac_reward_zero_std": 0.0, "grad_norm": 3.392097370317782, "kl": 0.00218963623046875, "learning_rate": 1.6e-07, "loss": -0.0438, "num_tokens": 3603272.0, "reward": -2.9802322387695312e-08, "reward_std": 0.550621509552002, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16509166218888163, "rewards/wordcountpos_reward/raw_geo/std": 0.1574318965551874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1004.4375, "completions/mean_terminated_length": 1004.4375, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.016403280656131225, "frac_reward_zero_std": 0.0, "grad_norm": 1.3767283140722126, "kl": 0.00044357776641845703, "learning_rate": 1.62e-07, "loss": -0.0374, "num_tokens": 3642079.0, "reward": -1.4901161193847656e-08, "reward_std": 0.996279239654541, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08207001229598464, "rewards/wordcountpos_reward/raw_geo/std": 0.12002407014395852, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1216.875, "completions/mean_terminated_length": 1151.5384521484375, "completions/min_length": 562.0, "completions/min_terminated_length": 562.0, "epoch": 0.016603320664132826, "frac_reward_zero_std": 0.0, "grad_norm": 3.356900404526726, "kl": 0.002445220947265625, "learning_rate": 1.64e-07, "loss": -0.0508, "num_tokens": 3685389.0, "reward": -3.725290298461914e-09, "reward_std": 1.0197583436965942, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.12103700061719323, "rewards/wordcountpos_reward/raw_geo/std": 0.0625620120191825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1195.625, "completions/mean_terminated_length": 1152.1429443359375, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.016803360672134428, "frac_reward_zero_std": 0.0, "grad_norm": 2.7888688409351525, "kl": 0.0016803741455078125, "learning_rate": 1.66e-07, "loss": 0.0424, "num_tokens": 3735783.0, "reward": 0.0, "reward_std": 0.7441692352294922, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06404987898105448, "rewards/wordcountpos_reward/raw_geo/std": 0.09885025710959537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.017003400680136026, "frac_reward_zero_std": 0.0, "grad_norm": 3.8198736260166233, "kl": 0.002620697021484375, "learning_rate": 1.68e-07, "loss": -0.0309, "num_tokens": 3786546.0, "reward": 0.0, "reward_std": 0.4674009680747986, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2120522081238164, "rewards/wordcountpos_reward/raw_geo/std": 0.2643144000897039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 972.5625, "completions/mean_terminated_length": 972.5625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.017203440688137627, "frac_reward_zero_std": 0.0, "grad_norm": 3.4465610412465018, "kl": 0.002399444580078125, "learning_rate": 1.7000000000000001e-07, "loss": -0.2055, "num_tokens": 3819195.0, "reward": 0.0, "reward_std": 0.45157063007354736, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01212638696753808, "rewards/wordcountpos_reward/raw_geo/std": 0.0989730972160819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.166888740937943, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1032.25, "completions/mean_terminated_length": 1032.25, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.01740348069613923, "frac_reward_zero_std": 0.0, "grad_norm": 3.1029799146423547, "kl": 0.0016727447509765625, "learning_rate": 1.7199999999999998e-07, "loss": -0.0318, "num_tokens": 3859951.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8753600120544434, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14956301911332787, "rewards/wordcountpos_reward/raw_geo/std": 0.3036430780861878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1096.3125, "completions/mean_terminated_length": 1096.3125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.017603520704140826, "frac_reward_zero_std": 0.0, "grad_norm": 2.7406071941723593, "kl": 0.0018787384033203125, "learning_rate": 1.7399999999999997e-07, "loss": 0.01, "num_tokens": 3899604.0, "reward": 0.0, "reward_std": 0.7270439863204956, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3070543166390343, "rewards/wordcountpos_reward/raw_geo/std": 0.2784756707169464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635778, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1312.1875, "completions/mean_terminated_length": 1285.357177734375, "completions/min_length": 1139.0, "completions/min_terminated_length": 1139.0, "epoch": 0.017803560712142428, "frac_reward_zero_std": 0.0, "grad_norm": 2.7153769752823584, "kl": 0.0014514923095703125, "learning_rate": 1.76e-07, "loss": 0.0336, "num_tokens": 3951111.0, "reward": 2.9802322387695312e-08, "reward_std": 0.659126877784729, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05353880335974331, "rewards/wordcountpos_reward/raw_geo/std": 0.3364238241998972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 997.75, "completions/mean_terminated_length": 926.0000610351562, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.01800360072014403, "frac_reward_zero_std": 0.0, "grad_norm": 3.4629171711837587, "kl": 0.0025177001953125, "learning_rate": 1.7799999999999998e-07, "loss": -0.1032, "num_tokens": 3998235.0, "reward": 1.862645149230957e-08, "reward_std": 1.0609157085418701, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04898928131914003, "rewards/wordcountpos_reward/raw_geo/std": 0.0715433718391716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23094010767585033, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1108.6875, "completions/mean_terminated_length": 1108.6875, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.01820364072814563, "frac_reward_zero_std": 0.0, "grad_norm": 2.7100983398781335, "kl": 0.0019359588623046875, "learning_rate": 1.8e-07, "loss": 0.0265, "num_tokens": 4031694.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0323246717453003, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02602604761413077, "rewards/wordcountpos_reward/raw_geo/std": 0.08310000990370793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 995.3125, "completions/mean_terminated_length": 995.3125, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.01840368073614723, "frac_reward_zero_std": 0.0, "grad_norm": 3.9656884576577482, "kl": 0.00266265869140625, "learning_rate": 1.82e-07, "loss": 0.0011, "num_tokens": 4082723.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8962907791137695, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2710285801090401, "rewards/wordcountpos_reward/raw_geo/std": 0.09927527501427448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16815997674172586, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 1032.875, "completions/mean_terminated_length": 1001.7333984375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.01860372074414883, "frac_reward_zero_std": 0.0, "grad_norm": 3.257865049459383, "kl": 0.0021953582763671875, "learning_rate": 1.8399999999999998e-07, "loss": -0.0235, "num_tokens": 4134305.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9222845435142517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02851932756208324, "rewards/wordcountpos_reward/raw_geo/std": 0.0983720202569023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.01880376075215043, "frac_reward_zero_std": 0.0, "grad_norm": 2.591861929202499, "kl": 0.0010938644409179688, "learning_rate": 1.86e-07, "loss": -0.0082, "num_tokens": 4183336.0, "reward": 1.4901161193847656e-08, "reward_std": 0.927165150642395, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0654109965184039, "rewards/wordcountpos_reward/raw_geo/std": 0.03883543969044345, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1205.8125, "completions/mean_terminated_length": 1163.7857666015625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.01900380076015203, "frac_reward_zero_std": 0.0, "grad_norm": 2.843068809361827, "kl": 0.00186920166015625, "learning_rate": 1.88e-07, "loss": 0.0082, "num_tokens": 4223765.0, "reward": 0.0, "reward_std": 0.8880987167358398, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11054466481578845, "rewards/wordcountpos_reward/raw_geo/std": 0.16882626208618992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1079.9375, "completions/mean_terminated_length": 1079.9375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.01920384076815363, "frac_reward_zero_std": 0.0, "grad_norm": 3.5443043426009035, "kl": 0.00235748291015625, "learning_rate": 1.8999999999999998e-07, "loss": -0.027, "num_tokens": 4266828.0, "reward": 0.0, "reward_std": 1.0388721227645874, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12401498536400447, "rewards/wordcountpos_reward/raw_geo/std": 0.059548757863214494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1246.4375, "completions/mean_terminated_length": 1246.4375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.019403880776155232, "frac_reward_zero_std": 0.0, "grad_norm": 2.9745956866672834, "kl": 0.00156402587890625, "learning_rate": 1.92e-07, "loss": -0.0069, "num_tokens": 4302859.0, "reward": -1.862645149230957e-08, "reward_std": 1.0609395503997803, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3570642970214304, "rewards/wordcountpos_reward/raw_geo/std": 0.2923082116181851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1272.1875, "completions/mean_terminated_length": 1135.5, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.01960392078415683, "frac_reward_zero_std": 0.0, "grad_norm": 2.1934913728541017, "kl": 0.001129150390625, "learning_rate": 1.94e-07, "loss": -0.0397, "num_tokens": 4354878.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4104628562927246, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09081082787625226, "rewards/wordcountpos_reward/raw_geo/std": 0.09172777927884528, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512346, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1160.0, "completions/max_terminated_length": 1160.0, "completions/mean_length": 1017.3125, "completions/mean_terminated_length": 1017.3125, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.01980396079215843, "frac_reward_zero_std": 0.0, "grad_norm": 2.855960015372327, "kl": 0.0013818740844726562, "learning_rate": 1.96e-07, "loss": -0.009, "num_tokens": 4396003.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6896897554397583, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04171223591558619, "rewards/wordcountpos_reward/raw_geo/std": 0.04287635771819565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 940.875, "completions/mean_terminated_length": 940.875, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.020004000800160033, "frac_reward_zero_std": 0.0, "grad_norm": 3.4658420723462515, "kl": 0.002307891845703125, "learning_rate": 1.98e-07, "loss": -0.019, "num_tokens": 4432953.0, "reward": 0.0, "reward_std": 1.0164649486541748, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10926581236518279, "rewards/wordcountpos_reward/raw_geo/std": 0.10683221073875608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.06206328908341753, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 885.5, "completions/mean_terminated_length": 844.5333862304688, "completions/min_length": 521.0, "completions/min_terminated_length": 521.0, "epoch": 0.02020404080816163, "frac_reward_zero_std": 0.0, "grad_norm": 3.4682767329592936, "kl": 0.0017375946044921875, "learning_rate": 2e-07, "loss": 0.0544, "num_tokens": 4471337.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8995528221130371, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041884321407224076, "rewards/wordcountpos_reward/raw_geo/std": 0.0520283672232931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1149.4375, "completions/mean_terminated_length": 1149.4375, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.020404080816163232, "frac_reward_zero_std": 0.0, "grad_norm": 2.384941895438001, "kl": 0.0011348724365234375, "learning_rate": 2.02e-07, "loss": -0.0119, "num_tokens": 4511488.0, "reward": 0.0, "reward_std": 0.6648247838020325, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03173980269404662, "rewards/wordcountpos_reward/raw_geo/std": 0.10554141997322652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1060.0, "completions/max_terminated_length": 1060.0, "completions/mean_length": 795.875, "completions/mean_terminated_length": 795.875, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.020604120824164834, "frac_reward_zero_std": 0.0, "grad_norm": 3.459564942395811, "kl": 0.0019626617431640625, "learning_rate": 2.0399999999999997e-07, "loss": -0.0201, "num_tokens": 4548334.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9772127866744995, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06644742460073368, "rewards/wordcountpos_reward/raw_geo/std": 0.043228380952663556, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1074.25, "completions/mean_terminated_length": 1074.25, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.02080416083216643, "frac_reward_zero_std": 0.0, "grad_norm": 3.3682393095675396, "kl": 0.002368927001953125, "learning_rate": 2.06e-07, "loss": 0.0185, "num_tokens": 4585890.0, "reward": 0.0, "reward_std": 0.885869026184082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11426874211834061, "rewards/wordcountpos_reward/raw_geo/std": 0.23478524357029001, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1253.1875, "completions/mean_terminated_length": 1217.9285888671875, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.021004200840168033, "frac_reward_zero_std": 0.0, "grad_norm": 2.7597115654260542, "kl": 0.0016937255859375, "learning_rate": 2.0799999999999998e-07, "loss": 0.0091, "num_tokens": 4640645.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8757137060165405, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18459559015688365, "rewards/wordcountpos_reward/raw_geo/std": 0.1457668092119728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6416666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1179.5, "completions/mean_terminated_length": 1179.5, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.021204240848169634, "frac_reward_zero_std": 0.0, "grad_norm": 2.3564989642109744, "kl": 0.001285552978515625, "learning_rate": 2.0999999999999997e-07, "loss": -0.0079, "num_tokens": 4683549.0, "reward": -1.4901161193847656e-08, "reward_std": 1.021355390548706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1013983261764013, "rewards/wordcountpos_reward/raw_geo/std": 0.06767396516784616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.021404280856171236, "frac_reward_zero_std": 0.0, "grad_norm": 2.499781247330737, "kl": 0.0012226104736328125, "learning_rate": 2.12e-07, "loss": -0.0114, "num_tokens": 4725681.0, "reward": -3.725290298461914e-09, "reward_std": 0.9972322583198547, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1419042801655097, "rewards/wordcountpos_reward/raw_geo/std": 0.09633503898715794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1004.1875, "completions/mean_terminated_length": 1004.1875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.021604320864172834, "frac_reward_zero_std": 0.0, "grad_norm": 2.807298263356802, "kl": 0.001522064208984375, "learning_rate": 2.1399999999999998e-07, "loss": -0.0023, "num_tokens": 4762684.0, "reward": 0.0, "reward_std": 0.7315050959587097, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019604161675491012, "rewards/wordcountpos_reward/raw_geo/std": 0.06171842709267681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1154.0, "completions/max_terminated_length": 1154.0, "completions/mean_length": 956.875, "completions/mean_terminated_length": 956.875, "completions/min_length": 577.0, "completions/min_terminated_length": 577.0, "epoch": 0.021804360872174435, "frac_reward_zero_std": 0.0, "grad_norm": 2.6275841668135316, "kl": 0.0013952255249023438, "learning_rate": 2.1599999999999998e-07, "loss": -0.054, "num_tokens": 4792770.0, "reward": 0.0, "reward_std": 0.9902944564819336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1414058756587871, "rewards/wordcountpos_reward/raw_geo/std": 0.10904737544819008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1121.5625, "completions/mean_terminated_length": 1121.5625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.022004400880176037, "frac_reward_zero_std": 0.0, "grad_norm": 3.6076313920224785, "kl": 0.00240325927734375, "learning_rate": 2.18e-07, "loss": -0.0511, "num_tokens": 4833083.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0530446767807007, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03558353354252604, "rewards/wordcountpos_reward/raw_geo/std": 0.056397172435382066, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 976.625, "completions/mean_terminated_length": 976.625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.022204440888177635, "frac_reward_zero_std": 0.0, "grad_norm": 3.3235755539185035, "kl": 0.0019683837890625, "learning_rate": 2.1999999999999998e-07, "loss": 0.046, "num_tokens": 4864725.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8679072856903076, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12339057280874857, "rewards/wordcountpos_reward/raw_geo/std": 0.08453155713155466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1270.0, "completions/mean_terminated_length": 1132.0, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.022404480896179236, "frac_reward_zero_std": 0.0, "grad_norm": 2.1322034674072152, "kl": 0.0014905929565429688, "learning_rate": 2.22e-07, "loss": -0.1409, "num_tokens": 4917213.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8368362784385681, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1491872710799866, "rewards/wordcountpos_reward/raw_geo/std": 0.20379599415714353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1704025734460517, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1200.125, "completions/mean_terminated_length": 1130.923095703125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.022604520904180837, "frac_reward_zero_std": 0.0, "grad_norm": 3.5549011210358445, "kl": 0.0025691986083984375, "learning_rate": 2.24e-07, "loss": -0.0612, "num_tokens": 4967023.0, "reward": -1.4901161193847656e-08, "reward_std": 1.015423059463501, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11918412858870782, "rewards/wordcountpos_reward/raw_geo/std": 0.13533358411349417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 936.375, "completions/mean_terminated_length": 936.375, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.022804560912182435, "frac_reward_zero_std": 0.0, "grad_norm": 3.3389039395237767, "kl": 0.0020427703857421875, "learning_rate": 2.2599999999999999e-07, "loss": 0.0183, "num_tokens": 5015005.0, "reward": 0.0, "reward_std": 0.8978585600852966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009347811527754012, "rewards/wordcountpos_reward/raw_geo/std": 0.1411939361251286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1113.625, "completions/mean_terminated_length": 1113.625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.023004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 3.2878493435850324, "kl": 0.002399444580078125, "learning_rate": 2.28e-07, "loss": 0.0356, "num_tokens": 5066695.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8935801982879639, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11597057825403508, "rewards/wordcountpos_reward/raw_geo/std": 0.11845162069206665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 970.125, "completions/mean_terminated_length": 970.125, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.023204640928185638, "frac_reward_zero_std": 0.0, "grad_norm": 3.5267684813119535, "kl": 0.0021381378173828125, "learning_rate": 2.3e-07, "loss": 0.01, "num_tokens": 5107385.0, "reward": 0.0, "reward_std": 0.8746201992034912, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015695084144887007, "rewards/wordcountpos_reward/raw_geo/std": 0.2061006028584386, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1109.3125, "completions/mean_terminated_length": 1109.3125, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.023404680936187236, "frac_reward_zero_std": 0.0, "grad_norm": 3.0749471193826405, "kl": 0.002002716064453125, "learning_rate": 2.32e-07, "loss": -0.0146, "num_tokens": 5149702.0, "reward": 0.0, "reward_std": 0.6566903591156006, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011287498376811759, "rewards/wordcountpos_reward/raw_geo/std": 0.08044698658902417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 951.3125, "completions/mean_terminated_length": 951.3125, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.023604720944188837, "frac_reward_zero_std": 0.0, "grad_norm": 3.1064462779133564, "kl": 0.0017032623291015625, "learning_rate": 2.34e-07, "loss": 0.0172, "num_tokens": 5185859.0, "reward": 0.0, "reward_std": 0.8671146631240845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04180583506795414, "rewards/wordcountpos_reward/raw_geo/std": 0.062227449788775945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1177.5, "completions/mean_terminated_length": 1177.5, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.02380476095219044, "frac_reward_zero_std": 0.0, "grad_norm": 2.3813904851095384, "kl": 0.0012912750244140625, "learning_rate": 2.3599999999999997e-07, "loss": -0.014, "num_tokens": 5227563.0, "reward": 0.0, "reward_std": 0.732681393623352, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16515597229797943, "rewards/wordcountpos_reward/raw_geo/std": 0.16025598405421565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1140.4375, "completions/mean_terminated_length": 1140.4375, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.024004800960192037, "frac_reward_zero_std": 0.0, "grad_norm": 3.3862005299374833, "kl": 0.002170562744140625, "learning_rate": 2.38e-07, "loss": -0.0357, "num_tokens": 5273410.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0341181755065918, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1599049300454976, "rewards/wordcountpos_reward/raw_geo/std": 0.1869016424688019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 947.5625, "completions/mean_terminated_length": 947.5625, "completions/min_length": 557.0, "completions/min_terminated_length": 557.0, "epoch": 0.024204840968193638, "frac_reward_zero_std": 0.0, "grad_norm": 1.6435117375732204, "kl": 0.0004982054233551025, "learning_rate": 2.4e-07, "loss": -0.0361, "num_tokens": 5309651.0, "reward": 7.450580596923828e-09, "reward_std": 1.0177175998687744, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03212280122158982, "rewards/wordcountpos_reward/raw_geo/std": 0.03682415859142722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1053.375, "completions/mean_terminated_length": 1053.375, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.02440488097619524, "frac_reward_zero_std": 0.0, "grad_norm": 3.55504416660261, "kl": 0.002532958984375, "learning_rate": 2.4199999999999997e-07, "loss": -0.0464, "num_tokens": 5342577.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8219430446624756, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03377315111105618, "rewards/wordcountpos_reward/raw_geo/std": 0.0751326064845707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1374.4375, "completions/mean_terminated_length": 1276.77783203125, "completions/min_length": 1180.0, "completions/min_terminated_length": 1180.0, "epoch": 0.02460492098419684, "frac_reward_zero_std": 0.0, "grad_norm": 2.4842196179738147, "kl": 0.0013828277587890625, "learning_rate": 2.4399999999999996e-07, "loss": -0.0344, "num_tokens": 5395160.0, "reward": 0.0, "reward_std": 1.0364223718643188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008041307139657523, "rewards/wordcountpos_reward/raw_geo/std": 0.12155709441453373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970786, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1115.5625, "completions/mean_terminated_length": 987.4166870117188, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.02480496099219844, "frac_reward_zero_std": 0.0, "grad_norm": 3.663760255484724, "kl": 0.002788543701171875, "learning_rate": 2.46e-07, "loss": -0.0061, "num_tokens": 5433233.0, "reward": 0.0, "reward_std": 0.9057458639144897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.35074900357142696, "rewards/wordcountpos_reward/raw_geo/std": 0.27420162699006634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 1055.3125, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.02500500100020004, "frac_reward_zero_std": 0.0, "grad_norm": 2.110325750198092, "kl": 0.0006413459777832031, "learning_rate": 2.48e-07, "loss": -0.0124, "num_tokens": 5476478.0, "reward": -7.450580596923828e-09, "reward_std": 0.9084052443504333, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.32454356594644873, "rewards/wordcountpos_reward/raw_geo/std": 0.12864399465208828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1228.5625, "completions/mean_terminated_length": 1138.0833740234375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.02520504100820164, "frac_reward_zero_std": 0.0, "grad_norm": 2.725180393157286, "kl": 0.0017547607421875, "learning_rate": 2.5e-07, "loss": 0.0204, "num_tokens": 5521255.0, "reward": 0.0, "reward_std": 0.9177630543708801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09296925946984191, "rewards/wordcountpos_reward/raw_geo/std": 0.09994415368604596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1028.875, "completions/mean_terminated_length": 997.4667358398438, "completions/min_length": 626.0, "completions/min_terminated_length": 626.0, "epoch": 0.02540508101620324, "frac_reward_zero_std": 0.0, "grad_norm": 3.533129468799629, "kl": 0.002468109130859375, "learning_rate": 2.52e-07, "loss": 0.0167, "num_tokens": 5565925.0, "reward": 7.450580596923828e-09, "reward_std": 1.0681498050689697, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.005731092753961403, "rewards/wordcountpos_reward/raw_geo/std": 0.04030005129155748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1089.9375, "completions/mean_terminated_length": 1089.9375, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.02560512102420484, "frac_reward_zero_std": 0.0, "grad_norm": 2.614935433969565, "kl": 0.00139617919921875, "learning_rate": 2.5399999999999997e-07, "loss": -0.0108, "num_tokens": 5606364.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9422404170036316, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003325936766975837, "rewards/wordcountpos_reward/raw_geo/std": 0.0676704561930054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16278820596099708, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1149.625, "completions/mean_terminated_length": 1126.2667236328125, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.025805161032206442, "frac_reward_zero_std": 0.0, "grad_norm": 3.4055742079877773, "kl": 0.00235748291015625, "learning_rate": 2.56e-07, "loss": -0.0034, "num_tokens": 5650382.0, "reward": 4.470348358154297e-08, "reward_std": 0.9219162464141846, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06994234102620896, "rewards/wordcountpos_reward/raw_geo/std": 0.10535251869751974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455328, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1123.125, "completions/mean_terminated_length": 1098.0, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.02600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 3.7806052064194846, "kl": 0.002964019775390625, "learning_rate": 2.58e-07, "loss": -0.0223, "num_tokens": 5698360.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9927940368652344, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03625284063439026, "rewards/wordcountpos_reward/raw_geo/std": 0.07598196410270074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1165.5, "completions/mean_terminated_length": 1143.2000732421875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.026205241048209642, "frac_reward_zero_std": 0.0, "grad_norm": 3.3544193262357798, "kl": 0.002727508544921875, "learning_rate": 2.6e-07, "loss": 0.0272, "num_tokens": 5743120.0, "reward": 3.725290298461914e-09, "reward_std": 0.9749528169631958, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10519852249015271, "rewards/wordcountpos_reward/raw_geo/std": 0.07211044110903524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1248.0, "completions/mean_terminated_length": 1231.2000732421875, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.026405281056211243, "frac_reward_zero_std": 0.0, "grad_norm": 2.5524734540426284, "kl": 0.0016803741455078125, "learning_rate": 2.62e-07, "loss": -0.0441, "num_tokens": 5787936.0, "reward": -3.725290298461914e-08, "reward_std": 1.047700047492981, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017072103381075506, "rewards/wordcountpos_reward/raw_geo/std": 0.09429196039589322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 916.875, "completions/mean_terminated_length": 916.875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.02660532106421284, "frac_reward_zero_std": 0.0, "grad_norm": 3.984809904853974, "kl": 0.002796173095703125, "learning_rate": 2.64e-07, "loss": -0.0386, "num_tokens": 5827406.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8076682686805725, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22494880857242078, "rewards/wordcountpos_reward/raw_geo/std": 0.06743884582965824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 954.0, "completions/mean_terminated_length": 954.0, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.026805361072214443, "frac_reward_zero_std": 0.0, "grad_norm": 3.499095946392049, "kl": 0.002285003662109375, "learning_rate": 2.66e-07, "loss": 0.0048, "num_tokens": 5857294.0, "reward": 7.450580596923828e-09, "reward_std": 0.9633626937866211, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010387670910388783, "rewards/wordcountpos_reward/raw_geo/std": 0.0709327108022106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382576, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 940.9375, "completions/mean_terminated_length": 940.9375, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.027005401080216044, "frac_reward_zero_std": 0.0, "grad_norm": 3.1062492040873897, "kl": 0.0018215179443359375, "learning_rate": 2.68e-07, "loss": -0.0052, "num_tokens": 5890693.0, "reward": 0.0, "reward_std": 0.7280793190002441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019208240754161037, "rewards/wordcountpos_reward/raw_geo/std": 0.06828392423240585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 1022.8125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.027205441088217642, "frac_reward_zero_std": 0.0, "grad_norm": 3.2603917076292315, "kl": 0.0019207000732421875, "learning_rate": 2.7e-07, "loss": -0.0168, "num_tokens": 5921066.0, "reward": 0.0, "reward_std": 0.8826114535331726, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02657969863034007, "rewards/wordcountpos_reward/raw_geo/std": 0.07274257114292472, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1082.1875, "completions/mean_terminated_length": 1082.1875, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.027405481096219243, "frac_reward_zero_std": 0.0, "grad_norm": 2.7859647768529237, "kl": 0.0013942718505859375, "learning_rate": 2.72e-07, "loss": 0.0081, "num_tokens": 5961941.0, "reward": 0.0, "reward_std": 0.9826708436012268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07373379288402647, "rewards/wordcountpos_reward/raw_geo/std": 0.15970077738730076, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1249.9375, "completions/mean_terminated_length": 1166.5833740234375, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.027605521104220845, "frac_reward_zero_std": 0.0, "grad_norm": 2.936763347656408, "kl": 0.0021915435791015625, "learning_rate": 2.74e-07, "loss": 0.0078, "num_tokens": 6007172.0, "reward": 0.0, "reward_std": 1.0613622665405273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007903899007037299, "rewards/wordcountpos_reward/raw_geo/std": 0.08952140105427299, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0910840068085298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1131.5, "completions/mean_terminated_length": 1078.857177734375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.027805561112222446, "frac_reward_zero_std": 0.0, "grad_norm": 2.7921426756357075, "kl": 0.0017223358154296875, "learning_rate": 2.7600000000000004e-07, "loss": 0.007, "num_tokens": 6053004.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8758231997489929, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0253874822157782, "rewards/wordcountpos_reward/raw_geo/std": 0.2014196689822482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1023.8125, "completions/mean_terminated_length": 1023.8125, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.028005601120224044, "frac_reward_zero_std": 0.0, "grad_norm": 3.4174609368665783, "kl": 0.001895904541015625, "learning_rate": 2.7800000000000003e-07, "loss": 0.0346, "num_tokens": 6085441.0, "reward": 0.0, "reward_std": 0.6017365455627441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0644692289877486, "rewards/wordcountpos_reward/raw_geo/std": 0.11806502235160024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1024.5, "completions/mean_terminated_length": 992.800048828125, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.028205641128225645, "frac_reward_zero_std": 0.0, "grad_norm": 3.104866529478396, "kl": 0.002140045166015625, "learning_rate": 2.8e-07, "loss": 0.0069, "num_tokens": 6126377.0, "reward": 5.960464477539063e-08, "reward_std": 0.6350376009941101, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09587675568359075, "rewards/wordcountpos_reward/raw_geo/std": 0.09502655784770331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1040.9375, "completions/mean_terminated_length": 1040.9375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.028405681136227247, "frac_reward_zero_std": 0.0, "grad_norm": 2.1131392202631027, "kl": 0.001201629638671875, "learning_rate": 2.8199999999999996e-07, "loss": -0.0125, "num_tokens": 6172440.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0225037336349487, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417944072302122, "rewards/wordcountpos_reward/raw_geo/std": 0.09469170277743412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503964, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1030.4444580078125, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.028605721144228845, "frac_reward_zero_std": 0.0, "grad_norm": 2.6560744420850417, "kl": 0.0016994476318359375, "learning_rate": 2.8399999999999995e-07, "loss": -0.0364, "num_tokens": 6230382.0, "reward": 0.0, "reward_std": 0.555607795715332, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07065391504281567, "rewards/wordcountpos_reward/raw_geo/std": 0.2377454948908988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1203.1875, "completions/mean_terminated_length": 1160.7857666015625, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.028805761152230446, "frac_reward_zero_std": 0.0, "grad_norm": 2.7001440810266546, "kl": 0.0014753341674804688, "learning_rate": 2.8599999999999994e-07, "loss": 0.0262, "num_tokens": 6277681.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9347323179244995, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0726886643579835, "rewards/wordcountpos_reward/raw_geo/std": 0.1141527658611117, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1142.1875, "completions/mean_terminated_length": 1118.3333740234375, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.029005801160232048, "frac_reward_zero_std": 0.0, "grad_norm": 3.3034450193456477, "kl": 0.0023651123046875, "learning_rate": 2.88e-07, "loss": -0.0314, "num_tokens": 6322252.0, "reward": 0.0, "reward_std": 0.7847909927368164, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05470724248289191, "rewards/wordcountpos_reward/raw_geo/std": 0.0917857853186929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1090.0, "completions/mean_terminated_length": 1090.0, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.029205841168233646, "frac_reward_zero_std": 0.0, "grad_norm": 3.565351156924563, "kl": 0.002532958984375, "learning_rate": 2.9e-07, "loss": -0.024, "num_tokens": 6366404.0, "reward": -7.450580596923828e-09, "reward_std": 1.0665879249572754, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1619085009815739, "rewards/wordcountpos_reward/raw_geo/std": 0.11228047692914898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15341785110291775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 891.4375, "completions/mean_terminated_length": 891.4375, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.029405881176235247, "frac_reward_zero_std": 0.0, "grad_norm": 3.044216507303204, "kl": 0.0013246536254882812, "learning_rate": 2.9199999999999997e-07, "loss": -0.0204, "num_tokens": 6393883.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9801445007324219, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09128864151802746, "rewards/wordcountpos_reward/raw_geo/std": 0.09928565252239704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0859586463881842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1156.875, "completions/mean_terminated_length": 1156.875, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.02960592118423685, "frac_reward_zero_std": 0.0, "grad_norm": 2.667885227131188, "kl": 0.001422882080078125, "learning_rate": 2.9399999999999996e-07, "loss": -0.0217, "num_tokens": 6428065.0, "reward": 0.0, "reward_std": 0.6467468738555908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21520958934934514, "rewards/wordcountpos_reward/raw_geo/std": 0.2574197964156713, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1160.4375, "completions/mean_terminated_length": 1160.4375, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.029805961192238446, "frac_reward_zero_std": 0.0, "grad_norm": 3.368116829078309, "kl": 0.002346038818359375, "learning_rate": 2.9599999999999995e-07, "loss": -0.0449, "num_tokens": 6477560.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9497330188751221, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05884865624844422, "rewards/wordcountpos_reward/raw_geo/std": 0.2553102118738447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1148.375, "completions/mean_terminated_length": 1148.375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.030006001200240048, "frac_reward_zero_std": 0.0, "grad_norm": 2.840269778480782, "kl": 0.0018157958984375, "learning_rate": 2.98e-07, "loss": -0.1826, "num_tokens": 6517790.0, "reward": 3.725290298461914e-08, "reward_std": 0.9691453576087952, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07166437652560242, "rewards/wordcountpos_reward/raw_geo/std": 0.05517467197768663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.19163043135739746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1157.125, "completions/mean_terminated_length": 1157.125, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.03020604120824165, "frac_reward_zero_std": 0.0, "grad_norm": 3.241205600832832, "kl": 0.002277374267578125, "learning_rate": 3e-07, "loss": -0.0051, "num_tokens": 6566200.0, "reward": 0.0, "reward_std": 0.8757079839706421, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1252726546747316, "rewards/wordcountpos_reward/raw_geo/std": 0.14860622568769824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1028.125, "completions/mean_terminated_length": 1028.125, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.030406081216243247, "frac_reward_zero_std": 0.0, "grad_norm": 3.1304438832959685, "kl": 0.0014595985412597656, "learning_rate": 3.02e-07, "loss": -0.0447, "num_tokens": 6609938.0, "reward": 0.0, "reward_std": 0.8762164115905762, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00892900632361902, "rewards/wordcountpos_reward/raw_geo/std": 0.14022686054727213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1191.5, "completions/mean_terminated_length": 1170.933349609375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.03060612122424485, "frac_reward_zero_std": 0.0, "grad_norm": 3.4387916614209164, "kl": 0.002582550048828125, "learning_rate": 3.0399999999999997e-07, "loss": -0.0471, "num_tokens": 6654362.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8346088528633118, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08430786717237182, "rewards/wordcountpos_reward/raw_geo/std": 0.06411529205876527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 942.1875, "completions/mean_terminated_length": 905.0000610351562, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.03080616123224645, "frac_reward_zero_std": 0.0, "grad_norm": 3.5831376857759043, "kl": 0.00226593017578125, "learning_rate": 3.0599999999999996e-07, "loss": -0.0395, "num_tokens": 6689349.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0417293310165405, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011742284117961408, "rewards/wordcountpos_reward/raw_geo/std": 0.08746125385088048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1090.8125, "completions/mean_terminated_length": 1032.357177734375, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.03100620124024805, "frac_reward_zero_std": 0.0, "grad_norm": 3.386953741973585, "kl": 0.002246856689453125, "learning_rate": 3.08e-07, "loss": 0.0181, "num_tokens": 6730746.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0403060913085938, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005257365669998601, "rewards/wordcountpos_reward/raw_geo/std": 0.02506443591115819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.0739118594202782, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1019.4375, "completions/mean_terminated_length": 1019.4375, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.03120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 2.8624964229202554, "kl": 0.001758575439453125, "learning_rate": 3.1e-07, "loss": 0.0073, "num_tokens": 6780465.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7841784954071045, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12933117903437513, "rewards/wordcountpos_reward/raw_geo/std": 0.19474236112635995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1146.3125, "completions/mean_terminated_length": 1146.3125, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.03140628125625125, "frac_reward_zero_std": 0.0, "grad_norm": 2.93832291066197, "kl": 0.0017795562744140625, "learning_rate": 3.12e-07, "loss": 0.0335, "num_tokens": 6822374.0, "reward": 0.0, "reward_std": 0.8305565118789673, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12869431953514113, "rewards/wordcountpos_reward/raw_geo/std": 0.11311646795891614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1114.3125, "completions/mean_terminated_length": 1114.3125, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.03160632126425285, "frac_reward_zero_std": 0.0, "grad_norm": 3.566016521175704, "kl": 0.00229644775390625, "learning_rate": 3.14e-07, "loss": -0.0388, "num_tokens": 6873835.0, "reward": 0.0, "reward_std": 0.6947319507598877, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22582155981137864, "rewards/wordcountpos_reward/raw_geo/std": 0.17251187513536334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189793, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1247.125, "completions/mean_terminated_length": 1230.2667236328125, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.03180636127225445, "frac_reward_zero_std": 0.0, "grad_norm": 3.2330647170447384, "kl": 0.002361297607421875, "learning_rate": 3.1599999999999997e-07, "loss": -0.0321, "num_tokens": 6915309.0, "reward": 0.0, "reward_std": 0.6039950847625732, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09379096119745979, "rewards/wordcountpos_reward/raw_geo/std": 0.1418673062147658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1196.1875, "completions/mean_terminated_length": 1126.076904296875, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.03200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 2.7486087367504455, "kl": 0.0015363693237304688, "learning_rate": 3.18e-07, "loss": -0.015, "num_tokens": 6972576.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9035705327987671, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011589700336663985, "rewards/wordcountpos_reward/raw_geo/std": 0.03088870013934026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18519259244445038, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1106.75, "completions/mean_terminated_length": 1050.571533203125, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.03220644128825765, "frac_reward_zero_std": 0.0, "grad_norm": 2.575819086441216, "kl": 0.0009403228759765625, "learning_rate": 3.2e-07, "loss": 0.007, "num_tokens": 7025012.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0658233165740967, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10180951553021601, "rewards/wordcountpos_reward/raw_geo/std": 0.08388929266352826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.032406481296259254, "frac_reward_zero_std": 0.0, "grad_norm": 2.162613193517412, "kl": 0.00119781494140625, "learning_rate": 3.22e-07, "loss": -0.0038, "num_tokens": 7059800.0, "reward": 0.0, "reward_std": 0.9184061884880066, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.035449787671120364, "rewards/wordcountpos_reward/raw_geo/std": 0.1655324974661436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1038.25, "completions/mean_terminated_length": 1038.25, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.03260652130426085, "frac_reward_zero_std": 0.0, "grad_norm": 1.726300481116887, "kl": 0.0005960464477539062, "learning_rate": 3.24e-07, "loss": -0.0106, "num_tokens": 7102684.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5779698491096497, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07824030467868008, "rewards/wordcountpos_reward/raw_geo/std": 0.1217785746553571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/max_terminated_length": 1051.0, "completions/mean_length": 914.1875, "completions/mean_terminated_length": 914.1875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.03280656131226245, "frac_reward_zero_std": 0.0, "grad_norm": 1.8882417408775412, "kl": 0.00045561790466308594, "learning_rate": 3.26e-07, "loss": -0.001, "num_tokens": 7129199.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6361270546913147, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019847480950444092, "rewards/wordcountpos_reward/raw_geo/std": 0.05674052777081613, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1230.8125, "completions/mean_terminated_length": 1108.45458984375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.033006601320264055, "frac_reward_zero_std": 0.0, "grad_norm": 3.2836735737424263, "kl": 0.00185394287109375, "learning_rate": 3.28e-07, "loss": 0.002, "num_tokens": 7183932.0, "reward": 0.0, "reward_std": 0.4567263424396515, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0749356086203115, "rewards/wordcountpos_reward/raw_geo/std": 0.15793615083402637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823633, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1028.0625, "completions/mean_terminated_length": 960.6428833007812, "completions/min_length": 584.0, "completions/min_terminated_length": 584.0, "epoch": 0.03320664132826565, "frac_reward_zero_std": 0.0, "grad_norm": 3.414077559439006, "kl": 0.002269744873046875, "learning_rate": 3.3e-07, "loss": -0.0331, "num_tokens": 7226757.0, "reward": 0.0, "reward_std": 0.8215432167053223, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15710710270210468, "rewards/wordcountpos_reward/raw_geo/std": 0.15804379716155278, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1016.0, "completions/max_terminated_length": 1016.0, "completions/mean_length": 804.0625, "completions/mean_terminated_length": 804.0625, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.03340668133626725, "frac_reward_zero_std": 0.0, "grad_norm": 3.841176918343631, "kl": 0.00176239013671875, "learning_rate": 3.32e-07, "loss": -0.0192, "num_tokens": 7253814.0, "reward": 0.0, "reward_std": 0.9507547616958618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015403673599066161, "rewards/wordcountpos_reward/raw_geo/std": 0.07322449785572387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 931.5625, "completions/mean_terminated_length": 931.5625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.033606721344268856, "frac_reward_zero_std": 0.0, "grad_norm": 4.114088148721571, "kl": 0.003040313720703125, "learning_rate": 3.34e-07, "loss": -0.0069, "num_tokens": 7284879.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9934753179550171, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023421842390939075, "rewards/wordcountpos_reward/raw_geo/std": 0.031201541495178103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1175.1875, "completions/mean_terminated_length": 1175.1875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.033806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 2.692831376482583, "kl": 0.0016002655029296875, "learning_rate": 3.36e-07, "loss": -0.0321, "num_tokens": 7325962.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8348179459571838, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0040702192219965, "rewards/wordcountpos_reward/raw_geo/std": 0.07941516655990437, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1035.125, "completions/mean_terminated_length": 1004.1333618164062, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.03400680136027205, "frac_reward_zero_std": 0.0, "grad_norm": 3.6407281749099107, "kl": 0.00213623046875, "learning_rate": 3.38e-07, "loss": -0.0169, "num_tokens": 7376980.0, "reward": 0.0, "reward_std": 0.3154091238975525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0761640152028266, "rewards/wordcountpos_reward/raw_geo/std": 0.3022664167575318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1150.0, "completions/mean_terminated_length": 1126.666748046875, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.034206841368273656, "frac_reward_zero_std": 0.0, "grad_norm": 3.458684178068551, "kl": 0.002552032470703125, "learning_rate": 3.4000000000000003e-07, "loss": 0.0312, "num_tokens": 7422108.0, "reward": -7.450580596923828e-09, "reward_std": 1.017713189125061, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.003107375680797565, "rewards/wordcountpos_reward/raw_geo/std": 0.13157277958765579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7041666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.141878925953186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 993.875, "completions/mean_terminated_length": 993.875, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.034406881376275254, "frac_reward_zero_std": 0.0, "grad_norm": 2.878871516473372, "kl": 0.0018367767333984375, "learning_rate": 3.42e-07, "loss": -0.0647, "num_tokens": 7468522.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0539871454238892, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10589587437566766, "rewards/wordcountpos_reward/raw_geo/std": 0.10047178629357259, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1118.5625, "completions/mean_terminated_length": 1093.1334228515625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.03460692138427685, "frac_reward_zero_std": 0.0, "grad_norm": 3.1081152431224957, "kl": 0.0016803741455078125, "learning_rate": 3.4399999999999996e-07, "loss": -0.0216, "num_tokens": 7508555.0, "reward": -5.960464477539063e-08, "reward_std": 0.7262141108512878, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015564385874576205, "rewards/wordcountpos_reward/raw_geo/std": 0.15203653049736218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1229.4375, "completions/mean_terminated_length": 1229.4375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.03480696139227846, "frac_reward_zero_std": 0.0, "grad_norm": 3.0440136356529304, "kl": 0.0018520355224609375, "learning_rate": 3.4599999999999995e-07, "loss": -0.005, "num_tokens": 7555594.0, "reward": 0.0, "reward_std": 0.7898181676864624, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.017639132492135577, "rewards/wordcountpos_reward/raw_geo/std": 0.23086481665634997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1298.6875, "completions/mean_terminated_length": 1097.375, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.035007001400280055, "frac_reward_zero_std": 0.0, "grad_norm": 2.113065970436417, "kl": 0.0011692047119140625, "learning_rate": 3.4799999999999994e-07, "loss": -0.04, "num_tokens": 7613973.0, "reward": 0.0, "reward_std": 0.8858467936515808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07545944925166885, "rewards/wordcountpos_reward/raw_geo/std": 0.167531101117853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195308, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1053.3125, "completions/mean_terminated_length": 1053.3125, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.03520704140828165, "frac_reward_zero_std": 0.0, "grad_norm": 3.249743672170572, "kl": 0.0018157958984375, "learning_rate": 3.5e-07, "loss": -0.0234, "num_tokens": 7654746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9871081113815308, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18222974852849785, "rewards/wordcountpos_reward/raw_geo/std": 0.2153333859668719, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1374.25, "completions/mean_terminated_length": 1298.800048828125, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.03540708141628326, "frac_reward_zero_std": 0.0, "grad_norm": 2.9997439451733117, "kl": 0.002349853515625, "learning_rate": 3.52e-07, "loss": 0.0008, "num_tokens": 7708446.0, "reward": -2.9802322387695312e-08, "reward_std": 0.49946585297584534, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040972830221256785, "rewards/wordcountpos_reward/raw_geo/std": 0.07527936855353783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1072.4000244140625, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.035607121424284856, "frac_reward_zero_std": 0.0, "grad_norm": 2.8033839667518556, "kl": 0.001506805419921875, "learning_rate": 3.5399999999999997e-07, "loss": 0.0316, "num_tokens": 7749808.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8020362854003906, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12622919683024994, "rewards/wordcountpos_reward/raw_geo/std": 0.11355599317983416, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1139.6875, "completions/mean_terminated_length": 1139.6875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.03580716143228646, "frac_reward_zero_std": 0.0, "grad_norm": 3.511053791986106, "kl": 0.002407073974609375, "learning_rate": 3.5599999999999996e-07, "loss": -0.022, "num_tokens": 7785219.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9724830389022827, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006258823342861898, "rewards/wordcountpos_reward/raw_geo/std": 0.031881013999506956, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1191.0, "completions/max_terminated_length": 1191.0, "completions/mean_length": 1044.125, "completions/mean_terminated_length": 1044.125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.03600720144028806, "frac_reward_zero_std": 0.0, "grad_norm": 3.792621189945623, "kl": 0.002574920654296875, "learning_rate": 3.5799999999999995e-07, "loss": 0.0226, "num_tokens": 7826149.0, "reward": 0.0, "reward_std": 0.4066470265388489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21186225242447967, "rewards/wordcountpos_reward/raw_geo/std": 0.22223985717674158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 962.625, "completions/mean_terminated_length": 962.625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.036207241448289657, "frac_reward_zero_std": 0.0, "grad_norm": 3.6343550815169596, "kl": 0.0024318695068359375, "learning_rate": 3.6e-07, "loss": -0.0037, "num_tokens": 7857279.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4665229618549347, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04970994981291875, "rewards/wordcountpos_reward/raw_geo/std": 0.07474689532909116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818892, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1016.625, "completions/mean_terminated_length": 1016.625, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.03640728145629126, "frac_reward_zero_std": 0.0, "grad_norm": 3.0631302802851623, "kl": 0.001911163330078125, "learning_rate": 3.62e-07, "loss": -0.0151, "num_tokens": 7893177.0, "reward": 0.0, "reward_std": 0.753804087638855, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06338939390482173, "rewards/wordcountpos_reward/raw_geo/std": 0.09367060952409818, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 948.75, "completions/mean_terminated_length": 948.75, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.03660732146429286, "frac_reward_zero_std": 0.0, "grad_norm": 3.292156999717403, "kl": 0.0016918182373046875, "learning_rate": 3.64e-07, "loss": -0.0134, "num_tokens": 7923349.0, "reward": 0.0, "reward_std": 1.0503833293914795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08098727139230245, "rewards/wordcountpos_reward/raw_geo/std": 0.044604403994638525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 963.5, "completions/mean_terminated_length": 963.5, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.03680736147229446, "frac_reward_zero_std": 0.0, "grad_norm": 3.95966920283632, "kl": 0.00257110595703125, "learning_rate": 3.6599999999999997e-07, "loss": -0.0042, "num_tokens": 7956021.0, "reward": -4.470348358154297e-08, "reward_std": 0.9801254272460938, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029633090662191715, "rewards/wordcountpos_reward/raw_geo/std": 0.10525538767250749, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 896.125, "completions/mean_terminated_length": 896.125, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.03700740148029606, "frac_reward_zero_std": 0.0, "grad_norm": 3.5878131532814628, "kl": 0.0019073486328125, "learning_rate": 3.6799999999999996e-07, "loss": 0.056, "num_tokens": 8002015.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5900082588195801, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10677080929386158, "rewards/wordcountpos_reward/raw_geo/std": 0.1142294824476264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195013, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 794.1875, "completions/mean_terminated_length": 794.1875, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.03720744148829766, "frac_reward_zero_std": 0.0, "grad_norm": 3.117803911303747, "kl": 0.0014848709106445312, "learning_rate": 3.7e-07, "loss": -0.0534, "num_tokens": 8026906.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6441549062728882, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15024412523999706, "rewards/wordcountpos_reward/raw_geo/std": 0.10655149461960717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 978.6875, "completions/mean_terminated_length": 978.6875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.03740748149629926, "frac_reward_zero_std": 0.0, "grad_norm": 3.6281579413008287, "kl": 0.002536773681640625, "learning_rate": 3.72e-07, "loss": -0.0011, "num_tokens": 8063797.0, "reward": 0.0, "reward_std": 0.20660829544067383, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.012286760964341002, "rewards/wordcountpos_reward/raw_geo/std": 0.12717403274496453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 919.25, "completions/mean_terminated_length": 919.25, "completions/min_length": 577.0, "completions/min_terminated_length": 577.0, "epoch": 0.03760752150430086, "frac_reward_zero_std": 0.0, "grad_norm": 3.1910180791571126, "kl": 0.0019450187683105469, "learning_rate": 3.74e-07, "loss": -0.0311, "num_tokens": 8113321.0, "reward": 0.0, "reward_std": 0.9163142442703247, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06298861282831102, "rewards/wordcountpos_reward/raw_geo/std": 0.05211119526252271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1122.6875, "completions/mean_terminated_length": 1122.6875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.03780756151230246, "frac_reward_zero_std": 0.0, "grad_norm": 3.5635114672816797, "kl": 0.00274658203125, "learning_rate": 3.76e-07, "loss": 0.0284, "num_tokens": 8154004.0, "reward": 0.0, "reward_std": 0.9957304000854492, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14741367005867437, "rewards/wordcountpos_reward/raw_geo/std": 0.1260090567269409, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 897.875, "completions/mean_terminated_length": 897.875, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.03800760152030406, "frac_reward_zero_std": 0.0, "grad_norm": 3.6803461718858737, "kl": 0.0024261474609375, "learning_rate": 3.7799999999999997e-07, "loss": 0.0043, "num_tokens": 8176850.0, "reward": 5.960464477539063e-08, "reward_std": 0.735680341720581, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0179335721239762, "rewards/wordcountpos_reward/raw_geo/std": 0.03944499043521186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1020.75, "completions/mean_terminated_length": 1020.75, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.038207641528305664, "frac_reward_zero_std": 0.0, "grad_norm": 3.4050753674672176, "kl": 0.0019073486328125, "learning_rate": 3.7999999999999996e-07, "loss": -0.0176, "num_tokens": 8218310.0, "reward": 2.9802322387695312e-08, "reward_std": 0.830951988697052, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03690125483256228, "rewards/wordcountpos_reward/raw_geo/std": 0.14765704633450594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 943.625, "completions/mean_terminated_length": 943.625, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.03840768153630726, "frac_reward_zero_std": 0.0, "grad_norm": 3.050901350685848, "kl": 0.001934051513671875, "learning_rate": 3.82e-07, "loss": -0.0076, "num_tokens": 8259848.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0370866060256958, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02772329477845906, "rewards/wordcountpos_reward/raw_geo/std": 0.13147103932548143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1116.625, "completions/mean_terminated_length": 1116.625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.03860772154430886, "frac_reward_zero_std": 0.0, "grad_norm": 2.705519281577542, "kl": 0.0014190673828125, "learning_rate": 3.84e-07, "loss": 0.0136, "num_tokens": 8310986.0, "reward": 0.0, "reward_std": 1.0658533573150635, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11494210881142039, "rewards/wordcountpos_reward/raw_geo/std": 0.09234127052439448, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999157, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1223.4375, "completions/mean_terminated_length": 1223.4375, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.038807761552310464, "frac_reward_zero_std": 0.0, "grad_norm": 3.279743374009339, "kl": 0.002254486083984375, "learning_rate": 3.86e-07, "loss": -0.0402, "num_tokens": 8364265.0, "reward": -2.9802322387695312e-08, "reward_std": 0.698399543762207, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006329398525197531, "rewards/wordcountpos_reward/raw_geo/std": 0.18347327662619942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1129.4375, "completions/mean_terminated_length": 1104.7333984375, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.03900780156031206, "frac_reward_zero_std": 0.0, "grad_norm": 3.0135102651744936, "kl": 0.001689910888671875, "learning_rate": 3.88e-07, "loss": 0.0392, "num_tokens": 8406576.0, "reward": 5.960464477539063e-08, "reward_std": 0.8149147033691406, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13263655748177008, "rewards/wordcountpos_reward/raw_geo/std": 0.0760463669434398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1027.6875, "completions/mean_terminated_length": 1027.6875, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.03920784156831366, "frac_reward_zero_std": 0.0, "grad_norm": 3.1361277714137272, "kl": 0.002071380615234375, "learning_rate": 3.8999999999999997e-07, "loss": -0.0091, "num_tokens": 8442395.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8963784575462341, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0759340655854741, "rewards/wordcountpos_reward/raw_geo/std": 0.08140668260798975, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1151.1875, "completions/mean_terminated_length": 1127.933349609375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.039407881576315265, "frac_reward_zero_std": 0.0, "grad_norm": 3.727651298566592, "kl": 0.00257110595703125, "learning_rate": 3.92e-07, "loss": -0.0052, "num_tokens": 8480838.0, "reward": 0.0, "reward_std": 0.9072641134262085, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06784537601579145, "rewards/wordcountpos_reward/raw_geo/std": 0.13219449132676328, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1213.3125, "completions/mean_terminated_length": 1194.2000732421875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.03960792158431686, "frac_reward_zero_std": 0.0, "grad_norm": 2.2825428905305376, "kl": 0.0008411407470703125, "learning_rate": 3.94e-07, "loss": -0.0021, "num_tokens": 8524347.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9728500843048096, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012441524437029899, "rewards/wordcountpos_reward/raw_geo/std": 0.09089370630767515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1351.9375, "completions/mean_terminated_length": 1317.769287109375, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.03980796159231846, "frac_reward_zero_std": 0.0, "grad_norm": 2.973005495849857, "kl": 0.0021762847900390625, "learning_rate": 3.96e-07, "loss": -0.0021, "num_tokens": 8580450.0, "reward": 0.0, "reward_std": 0.5499659776687622, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04566220374326516, "rewards/wordcountpos_reward/raw_geo/std": 0.06935933060320619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14700718047466632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 920.625, "completions/mean_terminated_length": 920.625, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.040008001600320066, "frac_reward_zero_std": 0.0, "grad_norm": 3.727080114938833, "kl": 0.00251007080078125, "learning_rate": 3.98e-07, "loss": -0.022, "num_tokens": 8620556.0, "reward": -7.450580596923828e-09, "reward_std": 1.0630748271942139, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07240710981674245, "rewards/wordcountpos_reward/raw_geo/std": 0.15219866094924211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1074.3125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.040208041608321664, "frac_reward_zero_std": 0.0, "grad_norm": 3.2654280009746417, "kl": 0.002960205078125, "learning_rate": 4e-07, "loss": -0.1168, "num_tokens": 8669105.0, "reward": 0.0, "reward_std": 0.6544532775878906, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08119934346316145, "rewards/wordcountpos_reward/raw_geo/std": 0.06678370897685837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1182.25, "completions/mean_terminated_length": 1161.0667724609375, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.04040808161632326, "frac_reward_zero_std": 0.0, "grad_norm": 3.340283569297992, "kl": 0.00260162353515625, "learning_rate": 4.02e-07, "loss": -0.0475, "num_tokens": 8713269.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8794863224029541, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16211102509859834, "rewards/wordcountpos_reward/raw_geo/std": 0.08923403825898198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1290.375, "completions/mean_terminated_length": 1164.5999755859375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.04060812162432487, "frac_reward_zero_std": 0.0, "grad_norm": 3.1223416405331483, "kl": 0.002468109130859375, "learning_rate": 4.04e-07, "loss": -0.0256, "num_tokens": 8762155.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6992528438568115, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026972375505145148, "rewards/wordcountpos_reward/raw_geo/std": 0.1517567111786578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1264.6875, "completions/mean_terminated_length": 1157.727294921875, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.040808161632326465, "frac_reward_zero_std": 0.0, "grad_norm": 2.502828108217289, "kl": 0.001190185546875, "learning_rate": 4.06e-07, "loss": -0.0334, "num_tokens": 8812862.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7799094319343567, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0060468740045577386, "rewards/wordcountpos_reward/raw_geo/std": 0.1342806864506098, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1025.0, "completions/mean_terminated_length": 1025.0, "completions/min_length": 680.0, "completions/min_terminated_length": 680.0, "epoch": 0.04100820164032806, "frac_reward_zero_std": 0.0, "grad_norm": 3.8722147766190846, "kl": 0.002590179443359375, "learning_rate": 4.0799999999999995e-07, "loss": 0.0362, "num_tokens": 8852110.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0247706174850464, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07381078917153294, "rewards/wordcountpos_reward/raw_geo/std": 0.0953837459139895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1314.25, "completions/mean_terminated_length": 1301.86669921875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.04120824164832967, "frac_reward_zero_std": 0.0, "grad_norm": 2.5362502325345195, "kl": 0.0015392303466796875, "learning_rate": 4.0999999999999994e-07, "loss": -0.0439, "num_tokens": 8898666.0, "reward": 0.0, "reward_std": 0.9100049734115601, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12093206963526529, "rewards/wordcountpos_reward/raw_geo/std": 0.08009363976804447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1272.8125, "completions/mean_terminated_length": 1197.0833740234375, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.041408281656331265, "frac_reward_zero_std": 0.0, "grad_norm": 2.5468386215078263, "kl": 0.0012402534484863281, "learning_rate": 4.12e-07, "loss": -0.0095, "num_tokens": 8949423.0, "reward": 0.0, "reward_std": 0.9750853776931763, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003554812335247671, "rewards/wordcountpos_reward/raw_geo/std": 0.19559240150971294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1176.5625, "completions/mean_terminated_length": 1176.5625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.04160832166433286, "frac_reward_zero_std": 0.0, "grad_norm": 3.3752780921817847, "kl": 0.002216339111328125, "learning_rate": 4.14e-07, "loss": -0.0072, "num_tokens": 8988568.0, "reward": 0.0, "reward_std": 0.871620774269104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.114779931210139, "rewards/wordcountpos_reward/raw_geo/std": 0.09476311625242048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1208.375, "completions/mean_terminated_length": 1075.8182373046875, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.04180836167233447, "frac_reward_zero_std": 0.0, "grad_norm": 2.845883479966067, "kl": 0.0020427703857421875, "learning_rate": 4.1599999999999997e-07, "loss": -0.03, "num_tokens": 9036454.0, "reward": 0.0, "reward_std": 0.560102105140686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04502701724073032, "rewards/wordcountpos_reward/raw_geo/std": 0.13194859264475114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 933.0, "completions/max_terminated_length": 933.0, "completions/mean_length": 844.6875, "completions/mean_terminated_length": 844.6875, "completions/min_length": 627.0, "completions/min_terminated_length": 627.0, "epoch": 0.042008401680336066, "frac_reward_zero_std": 0.0, "grad_norm": 1.2202481999915225, "kl": 0.0003336668014526367, "learning_rate": 4.1799999999999996e-07, "loss": -0.0047, "num_tokens": 9074257.0, "reward": 0.0, "reward_std": 0.8350205421447754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.016202605595714716, "rewards/wordcountpos_reward/raw_geo/std": 0.0665817288609625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1150.5, "completions/mean_terminated_length": 1127.2000732421875, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.04220844168833767, "frac_reward_zero_std": 0.0, "grad_norm": 2.8463190345853056, "kl": 0.0017261505126953125, "learning_rate": 4.1999999999999995e-07, "loss": 0.0259, "num_tokens": 9125953.0, "reward": 0.0, "reward_std": 0.9862473607063293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03530538009471619, "rewards/wordcountpos_reward/raw_geo/std": 0.05835435454978616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1136.25, "completions/mean_terminated_length": 1112.0, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.04240848169633927, "frac_reward_zero_std": 0.0, "grad_norm": 2.8117754547957317, "kl": 0.001636505126953125, "learning_rate": 4.2199999999999994e-07, "loss": -0.0816, "num_tokens": 9175245.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9156209230422974, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02290077514995597, "rewards/wordcountpos_reward/raw_geo/std": 0.33724278352130804, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.04260852170434087, "frac_reward_zero_std": 0.0, "grad_norm": 3.170167923664273, "kl": 0.00215911865234375, "learning_rate": 4.24e-07, "loss": -0.0064, "num_tokens": 9215983.0, "reward": 0.0, "reward_std": 0.6473881602287292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1045517076384495, "rewards/wordcountpos_reward/raw_geo/std": 0.15889560604122613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16953094331342802, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1024.25, "completions/mean_terminated_length": 1024.25, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.04280856171234247, "frac_reward_zero_std": 0.0, "grad_norm": 2.938371293387922, "kl": 0.001605987548828125, "learning_rate": 4.26e-07, "loss": -0.0286, "num_tokens": 9257243.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5818344354629517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06563991627814246, "rewards/wordcountpos_reward/raw_geo/std": 0.11607894553999797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1096.5, "completions/mean_terminated_length": 1038.857177734375, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.04300860172034407, "frac_reward_zero_std": 0.0, "grad_norm": 3.5631244307040775, "kl": 0.002407073974609375, "learning_rate": 4.2799999999999997e-07, "loss": -0.1163, "num_tokens": 9301571.0, "reward": 1.4901161193847656e-08, "reward_std": 0.949698805809021, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10345276668112834, "rewards/wordcountpos_reward/raw_geo/std": 0.12313624374139397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 989.0, "completions/max_terminated_length": 989.0, "completions/mean_length": 799.8125, "completions/mean_terminated_length": 799.8125, "completions/min_length": 491.0, "completions/min_terminated_length": 491.0, "epoch": 0.04320864172834567, "frac_reward_zero_std": 0.0, "grad_norm": 3.769714070681134, "kl": 0.002651214599609375, "learning_rate": 4.2999999999999996e-07, "loss": -0.0269, "num_tokens": 9336920.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9266948699951172, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0498792099029989, "rewards/wordcountpos_reward/raw_geo/std": 0.06668889560042873, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1173.3125, "completions/mean_terminated_length": 1151.533447265625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.04340868173634727, "frac_reward_zero_std": 0.0, "grad_norm": 2.642290304005401, "kl": 0.000865936279296875, "learning_rate": 4.3199999999999995e-07, "loss": 0.0067, "num_tokens": 9370333.0, "reward": 1.862645149230957e-09, "reward_std": 0.9393295049667358, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.15558568939274872, "rewards/wordcountpos_reward/raw_geo/std": 0.29255120503185744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 874.5, "completions/mean_terminated_length": 874.5, "completions/min_length": 711.0, "completions/min_terminated_length": 711.0, "epoch": 0.04360872174434887, "frac_reward_zero_std": 0.0, "grad_norm": 2.221006878283574, "kl": 0.0013418197631835938, "learning_rate": 4.34e-07, "loss": 0.0133, "num_tokens": 9406805.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9754551649093628, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07210750638126699, "rewards/wordcountpos_reward/raw_geo/std": 0.023868796326206483, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1242.4375, "completions/mean_terminated_length": 1225.2667236328125, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.04380876175235047, "frac_reward_zero_std": 0.0, "grad_norm": 1.962652254850243, "kl": 0.000919342041015625, "learning_rate": 4.36e-07, "loss": -0.0226, "num_tokens": 9466236.0, "reward": 0.0, "reward_std": 0.8539537191390991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010749596747872818, "rewards/wordcountpos_reward/raw_geo/std": 0.11901292224974022, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1082.375, "completions/mean_terminated_length": 1082.375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.04400880176035207, "frac_reward_zero_std": 0.0, "grad_norm": 3.2792461401361157, "kl": 0.00229644775390625, "learning_rate": 4.38e-07, "loss": 0.0009, "num_tokens": 9512418.0, "reward": -2.9802322387695312e-08, "reward_std": 0.770158052444458, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.33078046995944205, "rewards/wordcountpos_reward/raw_geo/std": 0.23102998210724113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1089.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 930.375, "completions/mean_terminated_length": 930.375, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.04420884176835367, "frac_reward_zero_std": 0.0, "grad_norm": 3.6569481673546904, "kl": 0.00208282470703125, "learning_rate": 4.3999999999999997e-07, "loss": 0.0179, "num_tokens": 9560944.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4487355053424835, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05540984573235712, "rewards/wordcountpos_reward/raw_geo/std": 0.06647964140307619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1031.0, "completions/max_terminated_length": 1031.0, "completions/mean_length": 869.5625, "completions/mean_terminated_length": 869.5625, "completions/min_length": 614.0, "completions/min_terminated_length": 614.0, "epoch": 0.04440888177635527, "frac_reward_zero_std": 0.0, "grad_norm": 4.361008163837978, "kl": 0.003299713134765625, "learning_rate": 4.4199999999999996e-07, "loss": -0.0178, "num_tokens": 9594473.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0673775672912598, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14855897842667443, "rewards/wordcountpos_reward/raw_geo/std": 0.19004606810878735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.625, "rewards/wordcountpos_reward/raw_rule/std": 0.22949219304078008, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1136.5, "completions/mean_terminated_length": 1136.5, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.044608921784356874, "frac_reward_zero_std": 0.0, "grad_norm": 3.5246563719784905, "kl": 0.00261688232421875, "learning_rate": 4.44e-07, "loss": -0.0045, "num_tokens": 9644649.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8183039426803589, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2731578987364741, "rewards/wordcountpos_reward/raw_geo/std": 0.34677137497788774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 918.8125, "completions/mean_terminated_length": 880.0667114257812, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.04480896179235847, "frac_reward_zero_std": 0.0, "grad_norm": 2.5983836740091224, "kl": 0.00209808349609375, "learning_rate": 4.46e-07, "loss": 0.054, "num_tokens": 9691310.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8674355745315552, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417420603100946, "rewards/wordcountpos_reward/raw_geo/std": 0.31090114155806386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.6458333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.21322045624883876, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1293.5, "completions/mean_terminated_length": 1293.5, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.04500900180036007, "frac_reward_zero_std": 0.0, "grad_norm": 2.5619520266167988, "kl": 0.001697540283203125, "learning_rate": 4.48e-07, "loss": -0.0331, "num_tokens": 9734798.0, "reward": 0.0, "reward_std": 0.6951881647109985, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13601809935061862, "rewards/wordcountpos_reward/raw_geo/std": 0.2861767604950857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1065.9375, "completions/mean_terminated_length": 1065.9375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.045209041808361675, "frac_reward_zero_std": 0.0, "grad_norm": 3.0972722370830534, "kl": 0.002086639404296875, "learning_rate": 4.5e-07, "loss": -0.0294, "num_tokens": 9776397.0, "reward": 0.0, "reward_std": 0.37369710206985474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07690115437370365, "rewards/wordcountpos_reward/raw_geo/std": 0.17665170899241697, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1174.3333740234375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.04540908181636327, "frac_reward_zero_std": 0.0, "grad_norm": 2.6818268509468783, "kl": 0.001720428466796875, "learning_rate": 4.5199999999999997e-07, "loss": -0.0321, "num_tokens": 9825224.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7789763808250427, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14412644640263858, "rewards/wordcountpos_reward/raw_geo/std": 0.19802116435283593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1071.5, "completions/mean_terminated_length": 1071.5, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.04560912182436487, "frac_reward_zero_std": 0.0, "grad_norm": 3.4297091146136736, "kl": 0.0022220611572265625, "learning_rate": 4.54e-07, "loss": -0.0474, "num_tokens": 9865968.0, "reward": 0.0, "reward_std": 0.7685360908508301, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09570066720474164, "rewards/wordcountpos_reward/raw_geo/std": 0.09805851963071717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1061.375, "completions/mean_terminated_length": 1032.1334228515625, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.045809161832366475, "frac_reward_zero_std": 0.0, "grad_norm": 3.3764199769472514, "kl": 0.0021114349365234375, "learning_rate": 4.56e-07, "loss": 0.0192, "num_tokens": 9908422.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0675699710845947, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03232322866633074, "rewards/wordcountpos_reward/raw_geo/std": 0.052356298330070904, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1045.5625, "completions/mean_terminated_length": 1045.5625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.04600920184036807, "frac_reward_zero_std": 0.0, "grad_norm": 3.6358476431972786, "kl": 0.002521514892578125, "learning_rate": 4.58e-07, "loss": 0.0086, "num_tokens": 9949439.0, "reward": 0.0, "reward_std": 0.7558891177177429, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11293083128848397, "rewards/wordcountpos_reward/raw_geo/std": 0.26460484652038063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13817594795257457, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 979.375, "completions/mean_terminated_length": 979.375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.04620924184836967, "frac_reward_zero_std": 0.0, "grad_norm": 2.9432527026487554, "kl": 0.00164031982421875, "learning_rate": 4.6e-07, "loss": -0.0151, "num_tokens": 9987453.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7914766073226929, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2776319812833346, "rewards/wordcountpos_reward/raw_geo/std": 0.2091878370836691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 935.5, "completions/mean_terminated_length": 935.5, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.046409281856371276, "frac_reward_zero_std": 0.0, "grad_norm": 3.501816611091303, "kl": 0.0021820068359375, "learning_rate": 4.62e-07, "loss": 0.0098, "num_tokens": 10026053.0, "reward": 7.450580596923828e-09, "reward_std": 1.061112642288208, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0009388975839314043, "rewards/wordcountpos_reward/raw_geo/std": 0.25690021770429855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639732, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1156.75, "completions/mean_terminated_length": 1133.86669921875, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.046609321864372874, "frac_reward_zero_std": 0.0, "grad_norm": 3.197590654024074, "kl": 0.002071380615234375, "learning_rate": 4.64e-07, "loss": 0.0031, "num_tokens": 10074593.0, "reward": 0.0, "reward_std": 0.9700093865394592, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05754481591005366, "rewards/wordcountpos_reward/raw_geo/std": 0.131811020957611, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 955.4375, "completions/mean_terminated_length": 955.4375, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.04680936187237447, "frac_reward_zero_std": 0.0, "grad_norm": 3.417303615929551, "kl": 0.0023059844970703125, "learning_rate": 4.66e-07, "loss": 0.0006, "num_tokens": 10101864.0, "reward": 7.450580596923828e-09, "reward_std": 1.053598403930664, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0794532384301168, "rewards/wordcountpos_reward/raw_geo/std": 0.0891815204166084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1011.0, "completions/max_terminated_length": 1011.0, "completions/mean_length": 917.25, "completions/mean_terminated_length": 917.25, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.04700940188037608, "frac_reward_zero_std": 0.0, "grad_norm": 2.5029887243756614, "kl": 0.0009889602661132812, "learning_rate": 4.68e-07, "loss": -0.0065, "num_tokens": 10144820.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4073413610458374, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13657683348336547, "rewards/wordcountpos_reward/raw_geo/std": 0.11450675754461333, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1328.5, "completions/mean_terminated_length": 1225.5999755859375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.047209441888377675, "frac_reward_zero_std": 0.0, "grad_norm": 3.126877234182285, "kl": 0.002269744873046875, "learning_rate": 4.6999999999999995e-07, "loss": -0.0216, "num_tokens": 10191940.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7219676971435547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1363078346597265, "rewards/wordcountpos_reward/raw_geo/std": 0.3528697059083918, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1000.25, "completions/mean_terminated_length": 1000.25, "completions/min_length": 621.0, "completions/min_terminated_length": 621.0, "epoch": 0.04740948189637927, "frac_reward_zero_std": 0.0, "grad_norm": 3.6674717025459933, "kl": 0.002590179443359375, "learning_rate": 4.7199999999999994e-07, "loss": -0.0247, "num_tokens": 10228712.0, "reward": 0.0, "reward_std": 0.7236359119415283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21741912367146687, "rewards/wordcountpos_reward/raw_geo/std": 0.05995470328566925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 957.125, "completions/mean_terminated_length": 957.125, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.04760952190438088, "frac_reward_zero_std": 0.0, "grad_norm": 3.5578471776175284, "kl": 0.002086639404296875, "learning_rate": 4.7399999999999993e-07, "loss": 0.0516, "num_tokens": 10277978.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0124543905258179, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09208478336285762, "rewards/wordcountpos_reward/raw_geo/std": 0.08017650590902214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1146.5, "completions/mean_terminated_length": 1122.933349609375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.047809561912382476, "frac_reward_zero_std": 0.0, "grad_norm": 2.8573036743864857, "kl": 0.0019283294677734375, "learning_rate": 4.76e-07, "loss": -0.0165, "num_tokens": 10330346.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8940658569335938, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2515121205060752, "rewards/wordcountpos_reward/raw_geo/std": 0.1708536332043649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1082.533447265625, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.048009601920384073, "frac_reward_zero_std": 0.0, "grad_norm": 3.2899874989285482, "kl": 0.0022525787353515625, "learning_rate": 4.779999999999999e-07, "loss": -0.0253, "num_tokens": 10377028.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7602491974830627, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14163505990739514, "rewards/wordcountpos_reward/raw_geo/std": 0.11600805262689726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 1003.625, "completions/mean_terminated_length": 1003.625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.04820964192838568, "frac_reward_zero_std": 0.0, "grad_norm": 2.781234983223836, "kl": 0.0013475418090820312, "learning_rate": 4.8e-07, "loss": 0.0168, "num_tokens": 10419478.0, "reward": 0.0, "reward_std": 0.6488606333732605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16687250177532015, "rewards/wordcountpos_reward/raw_geo/std": 0.19042451899573043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 954.75, "completions/mean_terminated_length": 954.75, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.048409681936387276, "frac_reward_zero_std": 0.0, "grad_norm": 3.769629689363409, "kl": 0.002178192138671875, "learning_rate": 4.82e-07, "loss": -0.0437, "num_tokens": 10461786.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8474140167236328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16588000018033014, "rewards/wordcountpos_reward/raw_geo/std": 0.3314221813736554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 820.0, "completions/mean_terminated_length": 820.0, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.04860972194438888, "frac_reward_zero_std": 0.0, "grad_norm": 3.1142124243308373, "kl": 0.0012178421020507812, "learning_rate": 4.839999999999999e-07, "loss": 0.0013, "num_tokens": 10493034.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7323605418205261, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05774076177500897, "rewards/wordcountpos_reward/raw_geo/std": 0.13265386704689375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 1028.375, "completions/mean_terminated_length": 1028.375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.04880976195239048, "frac_reward_zero_std": 0.0, "grad_norm": 1.7371766949160146, "kl": 0.00049591064453125, "learning_rate": 4.86e-07, "loss": 0.0089, "num_tokens": 10536736.0, "reward": 0.0, "reward_std": 1.009200096130371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010877500520318774, "rewards/wordcountpos_reward/raw_geo/std": 0.05590197255036356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1230.375, "completions/mean_terminated_length": 1140.5, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.04900980196039208, "frac_reward_zero_std": 0.0, "grad_norm": 2.9596381596370973, "kl": 0.0020198822021484375, "learning_rate": 4.879999999999999e-07, "loss": -0.0026, "num_tokens": 10584798.0, "reward": 0.0, "reward_std": 0.908205509185791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07778093230453198, "rewards/wordcountpos_reward/raw_geo/std": 0.0701129405116386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1163.3125, "completions/mean_terminated_length": 1140.86669921875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.04920984196839368, "frac_reward_zero_std": 0.0, "grad_norm": 2.339328394757649, "kl": 0.0009527206420898438, "learning_rate": 4.9e-07, "loss": -0.0075, "num_tokens": 10624059.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0123875141143799, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.031379049980629146, "rewards/wordcountpos_reward/raw_geo/std": 0.11357900524634221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639735, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1064.8125, "completions/mean_terminated_length": 1035.800048828125, "completions/min_length": 487.0, "completions/min_terminated_length": 487.0, "epoch": 0.04940988197639528, "frac_reward_zero_std": 0.0, "grad_norm": 2.562922333983465, "kl": 0.0017910003662109375, "learning_rate": 4.92e-07, "loss": -0.0635, "num_tokens": 10658624.0, "reward": 0.0, "reward_std": 0.6656528115272522, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0886025617532563, "rewards/wordcountpos_reward/raw_geo/std": 0.10955627817292048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1065.9375, "completions/mean_terminated_length": 1065.9375, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.04960992198439688, "frac_reward_zero_std": 0.0, "grad_norm": 3.4725363438468877, "kl": 0.0019683837890625, "learning_rate": 4.94e-07, "loss": 0.0172, "num_tokens": 10706671.0, "reward": 0.0, "reward_std": 0.7576044797897339, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012261243461389371, "rewards/wordcountpos_reward/raw_geo/std": 0.09734405789336316, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1201.25, "completions/mean_terminated_length": 1158.571533203125, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.04980996199239848, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682384067334424, "kl": 0.0017547607421875, "learning_rate": 4.96e-07, "loss": -0.0114, "num_tokens": 10745723.0, "reward": 1.30385160446167e-08, "reward_std": 0.997305154800415, "rewards/wordcountpos_reward/mean": 1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.41643560115602263, "rewards/wordcountpos_reward/raw_geo/std": 0.053809601866069846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1322.375, "completions/mean_terminated_length": 1263.166748046875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.05001000200040008, "frac_reward_zero_std": 0.0, "grad_norm": 2.498170494033239, "kl": 0.0016222000122070312, "learning_rate": 4.979999999999999e-07, "loss": -0.0182, "num_tokens": 10798585.0, "reward": 0.0, "reward_std": 0.7340776324272156, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.049307774680816895, "rewards/wordcountpos_reward/raw_geo/std": 0.06508276053191246, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.048495895206211566, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1026.8125, "completions/mean_terminated_length": 995.2667236328125, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.05021004200840168, "frac_reward_zero_std": 0.0, "grad_norm": 3.082007906039928, "kl": 0.0015439987182617188, "learning_rate": 5e-07, "loss": -0.0155, "num_tokens": 10833454.0, "reward": 0.0, "reward_std": 0.5358977317810059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009103681466487323, "rewards/wordcountpos_reward/raw_geo/std": 0.15289171081351013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 956.875, "completions/mean_terminated_length": 956.875, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.05041008201640328, "frac_reward_zero_std": 0.0, "grad_norm": 2.580286635495049, "kl": 0.0013780593872070312, "learning_rate": 5.02e-07, "loss": 0.0091, "num_tokens": 10878548.0, "reward": 0.0, "reward_std": 0.9386855363845825, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14375805416449897, "rewards/wordcountpos_reward/raw_geo/std": 0.0712602892053773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1224.0, "completions/mean_terminated_length": 1205.60009765625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.05061012202440488, "frac_reward_zero_std": 0.0, "grad_norm": 1.9276289839627134, "kl": 0.0009131431579589844, "learning_rate": 5.04e-07, "loss": 0.0239, "num_tokens": 10924924.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0571565628051758, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1659473574763683, "rewards/wordcountpos_reward/raw_geo/std": 0.18580689257688182, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1219.0625, "completions/mean_terminated_length": 1178.9285888671875, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.05081016203240648, "frac_reward_zero_std": 0.0, "grad_norm": 2.922856681083572, "kl": 0.0017871856689453125, "learning_rate": 5.06e-07, "loss": 0.004, "num_tokens": 10970213.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7152448892593384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07250068745291727, "rewards/wordcountpos_reward/raw_geo/std": 0.17110556123388107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1252.0625, "completions/mean_terminated_length": 1139.3636474609375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.051010202040408084, "frac_reward_zero_std": 0.0, "grad_norm": 2.9540837028324844, "kl": 0.001926422119140625, "learning_rate": 5.079999999999999e-07, "loss": 0.0086, "num_tokens": 11020030.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9681185483932495, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00919053917813432, "rewards/wordcountpos_reward/raw_geo/std": 0.04797649848379129, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1313.125, "completions/mean_terminated_length": 1250.8333740234375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.05121024204840968, "frac_reward_zero_std": 0.0, "grad_norm": 3.0333475841007367, "kl": 0.00246429443359375, "learning_rate": 5.1e-07, "loss": -0.032, "num_tokens": 11077224.0, "reward": 0.0, "reward_std": 0.6521957516670227, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01088874880562371, "rewards/wordcountpos_reward/raw_geo/std": 0.19445062691212478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1261.8125, "completions/mean_terminated_length": 1206.84619140625, "completions/min_length": 1047.0, "completions/min_terminated_length": 1047.0, "epoch": 0.05141028205641128, "frac_reward_zero_std": 0.0, "grad_norm": 3.1676015906229225, "kl": 0.00247955322265625, "learning_rate": 5.12e-07, "loss": -0.0159, "num_tokens": 11131957.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0640606880187988, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017548991635790653, "rewards/wordcountpos_reward/raw_geo/std": 0.11961394967139573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 964.5, "completions/mean_terminated_length": 888.0000610351562, "completions/min_length": 568.0, "completions/min_terminated_length": 568.0, "epoch": 0.051610322064412885, "frac_reward_zero_std": 0.0, "grad_norm": 2.949807245318231, "kl": 0.00179290771484375, "learning_rate": 5.14e-07, "loss": -0.0029, "num_tokens": 11171661.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7342426776885986, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03695731594706882, "rewards/wordcountpos_reward/raw_geo/std": 0.16550425037823152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1097.75, "completions/mean_terminated_length": 1070.933349609375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.05181036207241448, "frac_reward_zero_std": 0.0, "grad_norm": 3.2597310261177537, "kl": 0.0020351409912109375, "learning_rate": 5.16e-07, "loss": 0.0015, "num_tokens": 11213953.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9189243316650391, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0337706933685308, "rewards/wordcountpos_reward/raw_geo/std": 0.05898891676744919, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 1029.1875, "completions/mean_terminated_length": 997.800048828125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.05201040208041608, "frac_reward_zero_std": 0.0, "grad_norm": 3.500006324284666, "kl": 0.002288818359375, "learning_rate": 5.18e-07, "loss": -0.0266, "num_tokens": 11259436.0, "reward": 1.4901161193847656e-08, "reward_std": 1.016369104385376, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09808513341577466, "rewards/wordcountpos_reward/raw_geo/std": 0.0511581479541605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 921.25, "completions/mean_terminated_length": 921.25, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.052210442088417686, "frac_reward_zero_std": 0.0, "grad_norm": 3.3527336673424566, "kl": 0.0023040771484375, "learning_rate": 5.2e-07, "loss": -0.0288, "num_tokens": 11308728.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9681817889213562, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23922034870195008, "rewards/wordcountpos_reward/raw_geo/std": 0.4189218739249153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1043.375, "completions/mean_terminated_length": 1043.375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.052410482096419284, "frac_reward_zero_std": 0.0, "grad_norm": 3.789090010532537, "kl": 0.002643585205078125, "learning_rate": 5.22e-07, "loss": -0.022, "num_tokens": 11355198.0, "reward": 0.0, "reward_std": 0.6457654237747192, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05679817341659235, "rewards/wordcountpos_reward/raw_geo/std": 0.05172502433684633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941139, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1155.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 876.5625, "completions/mean_terminated_length": 876.5625, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.05261052210442088, "frac_reward_zero_std": 0.0, "grad_norm": 4.348528439154449, "kl": 0.00286865234375, "learning_rate": 5.24e-07, "loss": 0.0522, "num_tokens": 11393047.0, "reward": 0.0, "reward_std": 0.6818137168884277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.009604823762648262, "rewards/wordcountpos_reward/raw_geo/std": 0.0930052670633696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1274.8125, "completions/mean_terminated_length": 1259.800048828125, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.052810562112422486, "frac_reward_zero_std": 0.0, "grad_norm": 2.6796853844591833, "kl": 0.0017719268798828125, "learning_rate": 5.26e-07, "loss": -0.0074, "num_tokens": 11441044.0, "reward": -7.450580596923828e-09, "reward_std": 0.9428989887237549, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.12655737946537823, "rewards/wordcountpos_reward/raw_geo/std": 0.143888059477041, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.053010602120424084, "frac_reward_zero_std": 0.0, "grad_norm": 3.2411456624804784, "kl": 0.002246856689453125, "learning_rate": 5.28e-07, "loss": -0.0297, "num_tokens": 11486642.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6332656145095825, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1801774123872189, "rewards/wordcountpos_reward/raw_geo/std": 0.12452352156407756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1189.125, "completions/mean_terminated_length": 1189.125, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.05321064212842568, "frac_reward_zero_std": 0.0, "grad_norm": 3.36758558767966, "kl": 0.00261688232421875, "learning_rate": 5.3e-07, "loss": -0.0447, "num_tokens": 11535940.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9821509122848511, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03995858950419156, "rewards/wordcountpos_reward/raw_geo/std": 0.0922515695501585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1353.6875, "completions/mean_terminated_length": 1265.9000244140625, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.05341068213642729, "frac_reward_zero_std": 0.0, "grad_norm": 2.69135869048864, "kl": 0.00201416015625, "learning_rate": 5.32e-07, "loss": -0.0154, "num_tokens": 11594359.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0331776142120361, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04879070979342799, "rewards/wordcountpos_reward/raw_geo/std": 0.060374116114327976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 956.1875, "completions/mean_terminated_length": 956.1875, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.053610722144428885, "frac_reward_zero_std": 0.0, "grad_norm": 3.565425749584275, "kl": 0.00189971923828125, "learning_rate": 5.34e-07, "loss": -0.0061, "num_tokens": 11630762.0, "reward": -9.313225746154785e-09, "reward_std": 0.9770306944847107, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.009862841180942467, "rewards/wordcountpos_reward/raw_geo/std": 0.10174287325257836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.072520750542581, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1040.0, "completions/max_terminated_length": 1040.0, "completions/mean_length": 811.25, "completions/mean_terminated_length": 811.25, "completions/min_length": 549.0, "completions/min_terminated_length": 549.0, "epoch": 0.05381076215243048, "frac_reward_zero_std": 0.0, "grad_norm": 3.5042565406928774, "kl": 0.001926422119140625, "learning_rate": 5.36e-07, "loss": 0.0255, "num_tokens": 11655262.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8735646605491638, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01877535841962073, "rewards/wordcountpos_reward/raw_geo/std": 0.08614063247566738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1140.4375, "completions/mean_terminated_length": 1116.4666748046875, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.05401080216043209, "frac_reward_zero_std": 0.0, "grad_norm": 3.4586928748034422, "kl": 0.002590179443359375, "learning_rate": 5.38e-07, "loss": -0.1136, "num_tokens": 11708469.0, "reward": 0.0, "reward_std": 1.0621317625045776, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12225851999280535, "rewards/wordcountpos_reward/raw_geo/std": 0.07800859857537935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 957.8125, "completions/mean_terminated_length": 957.8125, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.054210842168433686, "frac_reward_zero_std": 0.0, "grad_norm": 2.738653175944796, "kl": 0.0013217926025390625, "learning_rate": 5.4e-07, "loss": -0.0103, "num_tokens": 11741914.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8998227715492249, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025634164800156116, "rewards/wordcountpos_reward/raw_geo/std": 0.06844079930998706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891874, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1063.0667724609375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.054410882176435284, "frac_reward_zero_std": 0.0, "grad_norm": 3.6735008716436566, "kl": 0.00262451171875, "learning_rate": 5.420000000000001e-07, "loss": 0.0283, "num_tokens": 11784144.0, "reward": 0.0, "reward_std": 1.0356578826904297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.017954077836690237, "rewards/wordcountpos_reward/raw_geo/std": 0.13486503139722647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 1016.9375, "completions/mean_terminated_length": 1016.9375, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.05461092218443689, "frac_reward_zero_std": 0.0, "grad_norm": 4.078931473141189, "kl": 0.003021240234375, "learning_rate": 5.44e-07, "loss": -0.0128, "num_tokens": 11823663.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4060839116573334, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1317357823676234, "rewards/wordcountpos_reward/raw_geo/std": 0.13562679679250417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 979.4375, "completions/mean_terminated_length": 979.4375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.05481096219243849, "frac_reward_zero_std": 0.0, "grad_norm": 2.7091414346538256, "kl": 0.00147247314453125, "learning_rate": 5.46e-07, "loss": -0.0071, "num_tokens": 11872710.0, "reward": 0.0, "reward_std": 0.904438853263855, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2436913189742776, "rewards/wordcountpos_reward/raw_geo/std": 0.17202725688187753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.06206328908341753, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1149.1875, "completions/mean_terminated_length": 1125.800048828125, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.05501100220044009, "frac_reward_zero_std": 0.0, "grad_norm": 3.3131370072683772, "kl": 0.00223541259765625, "learning_rate": 5.48e-07, "loss": 0.0336, "num_tokens": 11914897.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8671855926513672, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00042998981095781173, "rewards/wordcountpos_reward/raw_geo/std": 0.16643644225434123, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459203, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1165.0, "completions/max_terminated_length": 1165.0, "completions/mean_length": 958.5, "completions/mean_terminated_length": 958.5, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.05521104220844169, "frac_reward_zero_std": 0.0, "grad_norm": 3.33414768511492, "kl": 0.0020503997802734375, "learning_rate": 5.5e-07, "loss": -0.0201, "num_tokens": 11950849.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0278414487838745, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1112860828530976, "rewards/wordcountpos_reward/raw_geo/std": 0.13677217120618296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1219.375, "completions/mean_terminated_length": 1125.8333740234375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.05541108221644329, "frac_reward_zero_std": 0.0, "grad_norm": 3.194524162908702, "kl": 0.002140045166015625, "learning_rate": 5.520000000000001e-07, "loss": 0.0264, "num_tokens": 12002615.0, "reward": 0.0, "reward_std": 0.733389139175415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04092493020791305, "rewards/wordcountpos_reward/raw_geo/std": 0.25423442561951226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 967.625, "completions/mean_terminated_length": 967.625, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.05561112222444489, "frac_reward_zero_std": 0.0, "grad_norm": 3.7100092503320248, "kl": 0.00249481201171875, "learning_rate": 5.54e-07, "loss": 0.0401, "num_tokens": 12045769.0, "reward": 2.9802322387695312e-08, "reward_std": 0.62291020154953, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02627827228191887, "rewards/wordcountpos_reward/raw_geo/std": 0.06679695132261204, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1075.9375, "completions/mean_terminated_length": 1075.9375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.05581116223244649, "frac_reward_zero_std": 0.0, "grad_norm": 3.1001417425462474, "kl": 0.0018062591552734375, "learning_rate": 5.560000000000001e-07, "loss": -0.0128, "num_tokens": 12082104.0, "reward": 0.0, "reward_std": 0.6159245371818542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0631568710393361, "rewards/wordcountpos_reward/raw_geo/std": 0.07380088512307473, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1019.4375, "completions/mean_terminated_length": 987.4000244140625, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.05601120224044809, "frac_reward_zero_std": 0.0, "grad_norm": 3.090708545740446, "kl": 0.00196075439453125, "learning_rate": 5.58e-07, "loss": -0.0039, "num_tokens": 12115935.0, "reward": 0.0, "reward_std": 0.612042248249054, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09689570793110974, "rewards/wordcountpos_reward/raw_geo/std": 0.10598693754127003, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 931.875, "completions/mean_terminated_length": 931.875, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.05621124224844969, "frac_reward_zero_std": 0.0, "grad_norm": 3.363495670737204, "kl": 0.0018367767333984375, "learning_rate": 5.6e-07, "loss": 0.0292, "num_tokens": 12152613.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8437509536743164, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09482659079586239, "rewards/wordcountpos_reward/raw_geo/std": 0.13114599876635355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1302.4375, "completions/mean_terminated_length": 1274.21435546875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.05641128225645129, "frac_reward_zero_std": 0.0, "grad_norm": 2.8831623863197833, "kl": 0.002315521240234375, "learning_rate": 5.620000000000001e-07, "loss": -0.0359, "num_tokens": 12205908.0, "reward": 0.0, "reward_std": 1.027881145477295, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09447188890396592, "rewards/wordcountpos_reward/raw_geo/std": 0.15564244232480584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14807405554629052, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1172.5625, "completions/mean_terminated_length": 1097.0, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.05661132226445289, "frac_reward_zero_std": 0.0, "grad_norm": 3.1855622622614663, "kl": 0.0025482177734375, "learning_rate": 5.639999999999999e-07, "loss": -0.0679, "num_tokens": 12249109.0, "reward": 0.0, "reward_std": 0.8259780406951904, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10498265392435348, "rewards/wordcountpos_reward/raw_geo/std": 0.1863217193440607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1024.375, "completions/mean_terminated_length": 1024.375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.056811362272454494, "frac_reward_zero_std": 0.0, "grad_norm": 2.812524829164937, "kl": 0.0015053749084472656, "learning_rate": 5.66e-07, "loss": -0.0081, "num_tokens": 12292467.0, "reward": -7.450580596923828e-09, "reward_std": 1.0298956632614136, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14568746959091, "rewards/wordcountpos_reward/raw_geo/std": 0.0523996791957048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1109.125, "completions/mean_terminated_length": 1083.0667724609375, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.05701140228045609, "frac_reward_zero_std": 0.0, "grad_norm": 4.0071112440453645, "kl": 0.003082275390625, "learning_rate": 5.679999999999999e-07, "loss": -0.0033, "num_tokens": 12341581.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0290765762329102, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058311324661011944, "rewards/wordcountpos_reward/raw_geo/std": 0.0708403946332691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1457.25, "completions/mean_terminated_length": 1386.0, "completions/min_length": 1210.0, "completions/min_terminated_length": 1210.0, "epoch": 0.05721144228845769, "frac_reward_zero_std": 0.0, "grad_norm": 2.2016616153199116, "kl": 0.001537322998046875, "learning_rate": 5.699999999999999e-07, "loss": 0.0183, "num_tokens": 12395065.0, "reward": 0.0, "reward_std": 0.6284165382385254, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06762073027489378, "rewards/wordcountpos_reward/raw_geo/std": 0.1083948509433297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19302657656203526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1148.1875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.057411482296459294, "frac_reward_zero_std": 0.0, "grad_norm": 3.2005057734895987, "kl": 0.002338409423828125, "learning_rate": 5.719999999999999e-07, "loss": -0.0073, "num_tokens": 12447532.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9242644906044006, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2421782276404428, "rewards/wordcountpos_reward/raw_geo/std": 0.36510859624452024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 999.1875, "completions/mean_terminated_length": 999.1875, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.05761152230446089, "frac_reward_zero_std": 0.0, "grad_norm": 3.001088548172376, "kl": 0.0018463134765625, "learning_rate": 5.739999999999999e-07, "loss": -0.0142, "num_tokens": 12485719.0, "reward": 0.0, "reward_std": 1.0114140510559082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.039533985923892255, "rewards/wordcountpos_reward/raw_geo/std": 0.06518250967437134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1215.0, "completions/mean_terminated_length": 1196.0001220703125, "completions/min_length": 1051.0, "completions/min_terminated_length": 1051.0, "epoch": 0.05781156231246249, "frac_reward_zero_std": 0.0, "grad_norm": 2.4212077809990387, "kl": 0.0014495849609375, "learning_rate": 5.76e-07, "loss": 0.0129, "num_tokens": 12533639.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9643208384513855, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010432199316583488, "rewards/wordcountpos_reward/raw_geo/std": 0.2838686951503673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1108.1875, "completions/mean_terminated_length": 1108.1875, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.058011602320464095, "frac_reward_zero_std": 0.0, "grad_norm": 3.5708859830894153, "kl": 0.00247955322265625, "learning_rate": 5.779999999999999e-07, "loss": 0.0293, "num_tokens": 12574658.0, "reward": 0.0, "reward_std": 0.8100683689117432, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20566863818592698, "rewards/wordcountpos_reward/raw_geo/std": 0.11447752648502987, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 994.1875, "completions/mean_terminated_length": 960.4667358398438, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.05821164232846569, "frac_reward_zero_std": 0.0, "grad_norm": 3.123399244318877, "kl": 0.0014438629150390625, "learning_rate": 5.8e-07, "loss": -0.0178, "num_tokens": 12608757.0, "reward": 5.960464477539063e-08, "reward_std": 0.7506067752838135, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.032369080666875855, "rewards/wordcountpos_reward/raw_geo/std": 0.1782802644436956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 897.25, "completions/mean_terminated_length": 897.25, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.05841168233646729, "frac_reward_zero_std": 0.0, "grad_norm": 4.121676792863123, "kl": 0.002887725830078125, "learning_rate": 5.819999999999999e-07, "loss": 0.0138, "num_tokens": 12648025.0, "reward": -1.4901161193847656e-08, "reward_std": 0.918125331401825, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08942325223787094, "rewards/wordcountpos_reward/raw_geo/std": 0.1267665447355549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1112.916748046875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.058611722344468896, "frac_reward_zero_std": 0.0, "grad_norm": 3.3944720467872944, "kl": 0.002704620361328125, "learning_rate": 5.839999999999999e-07, "loss": -0.0203, "num_tokens": 12694924.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9822285175323486, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07775060471764175, "rewards/wordcountpos_reward/raw_geo/std": 0.09679048885033613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1246476515504285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1084.375, "completions/mean_terminated_length": 1084.375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.058811762352470494, "frac_reward_zero_std": 0.0, "grad_norm": 3.6419277625909783, "kl": 0.0022735595703125, "learning_rate": 5.86e-07, "loss": -0.0243, "num_tokens": 12739842.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0385416746139526, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11827043702999762, "rewards/wordcountpos_reward/raw_geo/std": 0.12328475187178402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11287488977066927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 932.0625, "completions/mean_terminated_length": 932.0625, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.05901180236047209, "frac_reward_zero_std": 0.0, "grad_norm": 3.7067288939495304, "kl": 0.0022945404052734375, "learning_rate": 5.879999999999999e-07, "loss": -0.0416, "num_tokens": 12786907.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8422307968139648, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0790643612082375, "rewards/wordcountpos_reward/raw_geo/std": 0.08532462775981063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1110.0, "completions/max_terminated_length": 1110.0, "completions/mean_length": 832.625, "completions/mean_terminated_length": 832.625, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.0592118423684737, "frac_reward_zero_std": 0.0, "grad_norm": 2.9964245781817285, "kl": 0.001468658447265625, "learning_rate": 5.9e-07, "loss": -0.002, "num_tokens": 12817349.0, "reward": -2.9802322387695312e-08, "reward_std": 0.572007417678833, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03612498848301205, "rewards/wordcountpos_reward/raw_geo/std": 0.08841184925567475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1214.125, "completions/mean_terminated_length": 1195.0667724609375, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.059411882376475295, "frac_reward_zero_std": 0.0, "grad_norm": 3.0757043152235926, "kl": 0.00231170654296875, "learning_rate": 5.919999999999999e-07, "loss": 0.0112, "num_tokens": 12869775.0, "reward": 0.0, "reward_std": 1.0303146839141846, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06435223181714003, "rewards/wordcountpos_reward/raw_geo/std": 0.11032298804130312, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1188.1875, "completions/mean_terminated_length": 1167.4000244140625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.05961192238447689, "frac_reward_zero_std": 0.0, "grad_norm": 3.3795733599482705, "kl": 0.002716064453125, "learning_rate": 5.939999999999999e-07, "loss": 0.0292, "num_tokens": 12913442.0, "reward": 0.0, "reward_std": 0.8517546653747559, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19127794010239385, "rewards/wordcountpos_reward/raw_geo/std": 0.2392408971160161, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1043.875, "completions/mean_terminated_length": 1043.875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.0598119623924785, "frac_reward_zero_std": 0.0, "grad_norm": 3.1264651625284445, "kl": 0.0018444061279296875, "learning_rate": 5.96e-07, "loss": -0.0202, "num_tokens": 12956928.0, "reward": -2.9802322387695312e-08, "reward_std": 0.780985951423645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06315532973992809, "rewards/wordcountpos_reward/raw_geo/std": 0.08005901049505422, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1313.75, "completions/mean_terminated_length": 1287.1429443359375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.060012002400480095, "frac_reward_zero_std": 0.0, "grad_norm": 2.5101882857081543, "kl": 0.0015964508056640625, "learning_rate": 5.979999999999999e-07, "loss": -0.0166, "num_tokens": 13010348.0, "reward": 0.0, "reward_std": 0.9337519407272339, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12569856781501984, "rewards/wordcountpos_reward/raw_geo/std": 0.18251893088022128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 892.25, "completions/mean_terminated_length": 892.25, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.06021204240848169, "frac_reward_zero_std": 0.0, "grad_norm": 3.8346163247245273, "kl": 0.0027618408203125, "learning_rate": 6e-07, "loss": -0.0223, "num_tokens": 13060680.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9565110802650452, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03965529497944607, "rewards/wordcountpos_reward/raw_geo/std": 0.25520347327466575, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1126.875, "completions/mean_terminated_length": 1126.875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.0604120824164833, "frac_reward_zero_std": 0.0, "grad_norm": 2.6806723016782477, "kl": 0.0012826919555664062, "learning_rate": 6.019999999999999e-07, "loss": -0.0367, "num_tokens": 13104510.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5853835940361023, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08667054525418348, "rewards/wordcountpos_reward/raw_geo/std": 0.0888390598040084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000003, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 975.25, "completions/mean_terminated_length": 975.25, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.060612122424484896, "frac_reward_zero_std": 0.0, "grad_norm": 2.8437785743985784, "kl": 0.001529693603515625, "learning_rate": 6.04e-07, "loss": 0.0088, "num_tokens": 13138474.0, "reward": 0.0, "reward_std": 0.36437976360321045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06299645475646642, "rewards/wordcountpos_reward/raw_geo/std": 0.06302040927071395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12758439472669758, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1020.4375, "completions/mean_terminated_length": 1020.4375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.060812162432486494, "frac_reward_zero_std": 0.0, "grad_norm": 2.4869444550853714, "kl": 0.0012989044189453125, "learning_rate": 6.06e-07, "loss": 0.0257, "num_tokens": 13175265.0, "reward": 0.0, "reward_std": 0.7430821657180786, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005488080137889088, "rewards/wordcountpos_reward/raw_geo/std": 0.07019403386774242, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1178.25, "completions/mean_terminated_length": 1156.800048828125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.0610122024404881, "frac_reward_zero_std": 0.0, "grad_norm": 2.6258813079891294, "kl": 0.0019683837890625, "learning_rate": 6.079999999999999e-07, "loss": -0.1066, "num_tokens": 13222661.0, "reward": 1.4901161193847656e-08, "reward_std": 1.029001235961914, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05724283744699999, "rewards/wordcountpos_reward/raw_geo/std": 0.08013325872368318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16638865702079933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1237.5, "completions/mean_terminated_length": 1220.0001220703125, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.0612122424484897, "frac_reward_zero_std": 0.0, "grad_norm": 2.7390794375804473, "kl": 0.0018596649169921875, "learning_rate": 6.1e-07, "loss": -0.0221, "num_tokens": 13266333.0, "reward": 0.0, "reward_std": 0.8190957307815552, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005076199972937165, "rewards/wordcountpos_reward/raw_geo/std": 0.16572168512538063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1056.625, "completions/mean_terminated_length": 1056.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.0614122824564913, "frac_reward_zero_std": 0.0, "grad_norm": 3.063044264731715, "kl": 0.0019969940185546875, "learning_rate": 6.119999999999999e-07, "loss": 0.0309, "num_tokens": 13307983.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9251764416694641, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04105501676273889, "rewards/wordcountpos_reward/raw_geo/std": 0.13178643108276034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1010.0, "completions/mean_terminated_length": 1010.0, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.0616123224644929, "frac_reward_zero_std": 0.0, "grad_norm": 3.9874463752278735, "kl": 0.002811431884765625, "learning_rate": 6.14e-07, "loss": 0.0082, "num_tokens": 13342799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.887786865234375, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025254785697083126, "rewards/wordcountpos_reward/raw_geo/std": 0.036471777473698085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1009.1875, "completions/mean_terminated_length": 1009.1875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.0618123624724945, "frac_reward_zero_std": 0.0, "grad_norm": 2.8693445513047005, "kl": 0.0018749237060546875, "learning_rate": 6.16e-07, "loss": 0.0035, "num_tokens": 13391522.0, "reward": 0.0, "reward_std": 0.8305159211158752, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11150393212404293, "rewards/wordcountpos_reward/raw_geo/std": 0.06834906851606187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1221.3125, "completions/mean_terminated_length": 1202.7333984375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.0620124024804961, "frac_reward_zero_std": 0.0, "grad_norm": 2.303923800082003, "kl": 0.001476287841796875, "learning_rate": 6.18e-07, "loss": -0.0188, "num_tokens": 13444119.0, "reward": 7.450580596923828e-09, "reward_std": 0.9877969026565552, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10394082649393052, "rewards/wordcountpos_reward/raw_geo/std": 0.08866684990643865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.15682025568335423, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 1079.875, "completions/mean_terminated_length": 1079.875, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.0622124424884977, "frac_reward_zero_std": 0.0, "grad_norm": 3.3611722832709807, "kl": 0.0022983551025390625, "learning_rate": 6.2e-07, "loss": 0.0016, "num_tokens": 13476485.0, "reward": 0.0, "reward_std": 1.0434529781341553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14645681153285914, "rewards/wordcountpos_reward/raw_geo/std": 0.05453470913693115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1185.4375, "completions/mean_terminated_length": 1112.84619140625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.0624124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.3905020408211195, "kl": 0.002315521240234375, "learning_rate": 6.219999999999999e-07, "loss": 0.0245, "num_tokens": 13527916.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0176523923873901, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11392709757098199, "rewards/wordcountpos_reward/raw_geo/std": 0.17001836166721562, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1016.0, "completions/mean_terminated_length": 1016.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.0626125225045009, "frac_reward_zero_std": 0.0, "grad_norm": 2.764912834215363, "kl": 0.0015354156494140625, "learning_rate": 6.24e-07, "loss": -0.0016, "num_tokens": 13576060.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9084645509719849, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07193952034221997, "rewards/wordcountpos_reward/raw_geo/std": 0.05538298695013117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1041.625, "completions/mean_terminated_length": 1011.0667114257812, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.0628125625125025, "frac_reward_zero_std": 0.0, "grad_norm": 3.208809453598079, "kl": 0.00234222412109375, "learning_rate": 6.26e-07, "loss": 0.0445, "num_tokens": 13631350.0, "reward": 0.0, "reward_std": 0.9452207684516907, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1717315844490716, "rewards/wordcountpos_reward/raw_geo/std": 0.248546937009435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.19085577257690145, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1147.4375, "completions/mean_terminated_length": 1147.4375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.0630126025205041, "frac_reward_zero_std": 0.0, "grad_norm": 3.182743019031256, "kl": 0.0020084381103515625, "learning_rate": 6.28e-07, "loss": -0.0422, "num_tokens": 13668245.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8133895397186279, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09711728312407862, "rewards/wordcountpos_reward/raw_geo/std": 0.06648308475473783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1149.0625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.0632126425285057, "frac_reward_zero_std": 0.0, "grad_norm": 3.371215938064058, "kl": 0.0027313232421875, "learning_rate": 6.3e-07, "loss": -0.0185, "num_tokens": 13722822.0, "reward": 0.0, "reward_std": 0.8048403263092041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05137935671907337, "rewards/wordcountpos_reward/raw_geo/std": 0.14063821767116533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1108.4375, "completions/mean_terminated_length": 1108.4375, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.0634126825365073, "frac_reward_zero_std": 0.0, "grad_norm": 3.2660824124838803, "kl": 0.002685546875, "learning_rate": 6.319999999999999e-07, "loss": -0.0136, "num_tokens": 13764357.0, "reward": 2.9802322387695312e-08, "reward_std": 0.808626115322113, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059376022695172254, "rewards/wordcountpos_reward/raw_geo/std": 0.1811806562000528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1039.25, "completions/mean_terminated_length": 1008.5333862304688, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.0636127225445089, "frac_reward_zero_std": 0.0, "grad_norm": 3.169479100437204, "kl": 0.0019283294677734375, "learning_rate": 6.34e-07, "loss": 0.0095, "num_tokens": 13805513.0, "reward": 0.0, "reward_std": 1.0416817665100098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09504080041498822, "rewards/wordcountpos_reward/raw_geo/std": 0.07216381431360824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 952.8125, "completions/mean_terminated_length": 952.8125, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.0638127625525105, "frac_reward_zero_std": 0.0, "grad_norm": 3.3554670934085853, "kl": 0.0021762847900390625, "learning_rate": 6.36e-07, "loss": -0.0313, "num_tokens": 13847878.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9850356578826904, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08900518555554442, "rewards/wordcountpos_reward/raw_geo/std": 0.049632250821620906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1112.4375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.0640128025605121, "frac_reward_zero_std": 0.0, "grad_norm": 3.150213494732458, "kl": 0.001941680908203125, "learning_rate": 6.38e-07, "loss": -0.0559, "num_tokens": 13884597.0, "reward": 0.0, "reward_std": 0.6624261140823364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027774763845650578, "rewards/wordcountpos_reward/raw_geo/std": 0.22926687131285664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1137.0, "completions/mean_terminated_length": 1085.1429443359375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.0642128425685137, "frac_reward_zero_std": 0.0, "grad_norm": 2.7949010862492543, "kl": 0.0018482208251953125, "learning_rate": 6.4e-07, "loss": 0.0362, "num_tokens": 13930397.0, "reward": 0.0, "reward_std": 0.9132857918739319, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0517072507827321, "rewards/wordcountpos_reward/raw_geo/std": 0.10795766044018207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459203, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1057.3125, "completions/mean_terminated_length": 1057.3125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.0644128825765153, "frac_reward_zero_std": 0.0, "grad_norm": 3.8088891084344714, "kl": 0.002841949462890625, "learning_rate": 6.42e-07, "loss": -0.0485, "num_tokens": 13979578.0, "reward": 0.0, "reward_std": 0.7923277616500854, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26652284433631424, "rewards/wordcountpos_reward/raw_geo/std": 0.29583048435687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.06461292258451691, "frac_reward_zero_std": 0.0, "grad_norm": 3.628668319526969, "kl": 0.002689361572265625, "learning_rate": 6.44e-07, "loss": -0.0469, "num_tokens": 14022820.0, "reward": 3.3527612686157227e-08, "reward_std": 1.0566508769989014, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.053744711475414986, "rewards/wordcountpos_reward/raw_geo/std": 0.062447366133733515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 840.875, "completions/mean_terminated_length": 840.875, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.06481296259251851, "frac_reward_zero_std": 0.0, "grad_norm": 4.162774266686213, "kl": 0.0028839111328125, "learning_rate": 6.46e-07, "loss": -0.0581, "num_tokens": 14058098.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9659443497657776, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017583180819989268, "rewards/wordcountpos_reward/raw_geo/std": 0.040396642014153515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1043.3125, "completions/mean_terminated_length": 1043.3125, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.0650130026005201, "frac_reward_zero_std": 0.0, "grad_norm": 3.3173352056337864, "kl": 0.0025634765625, "learning_rate": 6.48e-07, "loss": 0.0054, "num_tokens": 14099135.0, "reward": 0.0, "reward_std": 0.8442152738571167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14853364130675664, "rewards/wordcountpos_reward/raw_geo/std": 0.17144873852492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1072.5, "completions/mean_terminated_length": 1072.5, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.0652130426085217, "frac_reward_zero_std": 0.0, "grad_norm": 3.6595747162271466, "kl": 0.00283050537109375, "learning_rate": 6.5e-07, "loss": -0.0566, "num_tokens": 14149071.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7453584671020508, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012454429072859121, "rewards/wordcountpos_reward/raw_geo/std": 0.07412407372166926, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1281.0625, "completions/mean_terminated_length": 1149.7000732421875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.0654130826165233, "frac_reward_zero_std": 0.0, "grad_norm": 2.176983313820909, "kl": 0.00162506103515625, "learning_rate": 6.52e-07, "loss": 0.0172, "num_tokens": 14197480.0, "reward": 0.0, "reward_std": 0.687066912651062, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08468934088941331, "rewards/wordcountpos_reward/raw_geo/std": 0.20035030405853219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1203.0, "completions/mean_terminated_length": 1203.0, "completions/min_length": 1112.0, "completions/min_terminated_length": 1112.0, "epoch": 0.0656131226245249, "frac_reward_zero_std": 0.0, "grad_norm": 2.2508664761846973, "kl": 0.0011320114135742188, "learning_rate": 6.54e-07, "loss": -0.0258, "num_tokens": 14242432.0, "reward": 0.0, "reward_std": 0.7108990550041199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04138512484886412, "rewards/wordcountpos_reward/raw_geo/std": 0.038863689948215284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 910.3125, "completions/mean_terminated_length": 910.3125, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.06581316263252651, "frac_reward_zero_std": 0.0, "grad_norm": 11.575300056029576, "kl": 0.010959625244140625, "learning_rate": 6.56e-07, "loss": 0.0436, "num_tokens": 14284277.0, "reward": 0.0, "reward_std": 0.4071442782878876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15959212636785292, "rewards/wordcountpos_reward/raw_geo/std": 0.08815790274196343, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.16504769232176725, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1297.0625, "completions/mean_terminated_length": 1297.0625, "completions/min_length": 1124.0, "completions/min_terminated_length": 1124.0, "epoch": 0.06601320264052811, "frac_reward_zero_std": 0.0, "grad_norm": 2.4195217715731987, "kl": 0.001461029052734375, "learning_rate": 6.58e-07, "loss": -0.0004, "num_tokens": 14327758.0, "reward": 0.0, "reward_std": 0.8564111590385437, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04800210085630859, "rewards/wordcountpos_reward/raw_geo/std": 0.08192731407821649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1024.875, "completions/mean_terminated_length": 1024.875, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.06621324264852971, "frac_reward_zero_std": 0.0, "grad_norm": 4.2884484054422245, "kl": 0.002864837646484375, "learning_rate": 6.6e-07, "loss": 0.0125, "num_tokens": 14372228.0, "reward": 0.0, "reward_std": 0.8580461740493774, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14922614970099476, "rewards/wordcountpos_reward/raw_geo/std": 0.17456040203399587, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1118.375, "completions/mean_terminated_length": 1118.375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.0664132826565313, "frac_reward_zero_std": 0.0, "grad_norm": 3.347544540598828, "kl": 0.002353668212890625, "learning_rate": 6.62e-07, "loss": -0.0511, "num_tokens": 14419170.0, "reward": 0.0, "reward_std": 0.6000388264656067, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021246552942310905, "rewards/wordcountpos_reward/raw_geo/std": 0.17621630609971914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16187558093703852, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1119.5, "completions/mean_terminated_length": 1119.5, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.0666133226645329, "frac_reward_zero_std": 0.0, "grad_norm": 3.236978140615024, "kl": 0.0025634765625, "learning_rate": 6.64e-07, "loss": 0.0054, "num_tokens": 14461018.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5933891534805298, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05126863502407506, "rewards/wordcountpos_reward/raw_geo/std": 0.08129981030182694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 844.4375, "completions/mean_terminated_length": 844.4375, "completions/min_length": 358.0, "completions/min_terminated_length": 358.0, "epoch": 0.0668133626725345, "frac_reward_zero_std": 0.0, "grad_norm": 3.796790200361543, "kl": 0.00252532958984375, "learning_rate": 6.66e-07, "loss": 0.0222, "num_tokens": 14504441.0, "reward": 0.0, "reward_std": 0.578281044960022, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03980048960926316, "rewards/wordcountpos_reward/raw_geo/std": 0.059171702716111954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1083.125, "completions/mean_terminated_length": 1083.125, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.06701340268053611, "frac_reward_zero_std": 0.0, "grad_norm": 2.960862612234057, "kl": 0.001617431640625, "learning_rate": 6.68e-07, "loss": 0.0126, "num_tokens": 14539699.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9998736381530762, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03836378500552864, "rewards/wordcountpos_reward/raw_geo/std": 0.06505059535662829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1019.5625, "completions/mean_terminated_length": 1019.5625, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.06721344268853771, "frac_reward_zero_std": 0.0, "grad_norm": 3.044760141416458, "kl": 0.001708984375, "learning_rate": 6.7e-07, "loss": -0.0471, "num_tokens": 14590132.0, "reward": 0.0, "reward_std": 0.7415168285369873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.048215794580698204, "rewards/wordcountpos_reward/raw_geo/std": 0.21159535757855202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 949.5625, "completions/mean_terminated_length": 949.5625, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.06741348269653931, "frac_reward_zero_std": 0.0, "grad_norm": 2.9768790752339065, "kl": 0.0016498565673828125, "learning_rate": 6.72e-07, "loss": -0.0326, "num_tokens": 14641037.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9546054601669312, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06805036725184002, "rewards/wordcountpos_reward/raw_geo/std": 0.1477188160573905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1066.8125, "completions/mean_terminated_length": 1066.8125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.06761352270454091, "frac_reward_zero_std": 0.0, "grad_norm": 3.181960438740294, "kl": 0.002201080322265625, "learning_rate": 6.74e-07, "loss": -0.0371, "num_tokens": 14681834.0, "reward": 0.0, "reward_std": 1.0623748302459717, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08070542429791751, "rewards/wordcountpos_reward/raw_geo/std": 0.20169913646235652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1432.625, "completions/mean_terminated_length": 1392.2000732421875, "completions/min_length": 1234.0, "completions/min_terminated_length": 1234.0, "epoch": 0.0678135627125425, "frac_reward_zero_std": 0.0, "grad_norm": 2.2765867329539096, "kl": 0.0016117095947265625, "learning_rate": 6.76e-07, "loss": -0.0028, "num_tokens": 14731316.0, "reward": 0.0, "reward_std": 0.46427273750305176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11775969495363756, "rewards/wordcountpos_reward/raw_geo/std": 0.08700059071223658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1184.75, "completions/mean_terminated_length": 1163.7333984375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.0680136027205441, "frac_reward_zero_std": 0.0, "grad_norm": 2.534593453322148, "kl": 0.00226593017578125, "learning_rate": 6.78e-07, "loss": -0.1523, "num_tokens": 14780184.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9950509071350098, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10665595219291132, "rewards/wordcountpos_reward/raw_geo/std": 0.07305267964045026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.19148542155126763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1141.875, "completions/mean_terminated_length": 1141.875, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.06821364272854571, "frac_reward_zero_std": 0.0, "grad_norm": 2.881249157475616, "kl": 0.002140045166015625, "learning_rate": 6.800000000000001e-07, "loss": -0.0323, "num_tokens": 14822270.0, "reward": 0.0, "reward_std": 0.8476592302322388, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16595827895346643, "rewards/wordcountpos_reward/raw_geo/std": 0.2569588317902804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1034.9375, "completions/mean_terminated_length": 1034.9375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.06841368273654731, "frac_reward_zero_std": 0.0, "grad_norm": 2.709589589085801, "kl": 0.0016498565673828125, "learning_rate": 6.82e-07, "loss": 0.001, "num_tokens": 14872501.0, "reward": 2.9802322387695312e-08, "reward_std": 0.873496413230896, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12085816525472777, "rewards/wordcountpos_reward/raw_geo/std": 0.08748211994370123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1236.75, "completions/mean_terminated_length": 1236.75, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.06861372274454891, "frac_reward_zero_std": 0.0, "grad_norm": 3.188809501759933, "kl": 0.00220489501953125, "learning_rate": 6.84e-07, "loss": -0.0068, "num_tokens": 14915129.0, "reward": 0.0, "reward_std": 0.708284854888916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2786993791153533, "rewards/wordcountpos_reward/raw_geo/std": 0.23962569152925248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 968.625, "completions/mean_terminated_length": 968.625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.06881376275255051, "frac_reward_zero_std": 0.0, "grad_norm": 2.7840982497592917, "kl": 0.001495361328125, "learning_rate": 6.86e-07, "loss": -0.0259, "num_tokens": 14957419.0, "reward": 0.0, "reward_std": 0.9442850351333618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.055345984809441334, "rewards/wordcountpos_reward/raw_geo/std": 0.07450376610568207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1317.125, "completions/mean_terminated_length": 1134.25, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.0690138027605521, "frac_reward_zero_std": 0.0, "grad_norm": 2.835135661127783, "kl": 0.002285003662109375, "learning_rate": 6.879999999999999e-07, "loss": 0.0125, "num_tokens": 15011917.0, "reward": 0.0, "reward_std": 0.6531475782394409, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09215414551257015, "rewards/wordcountpos_reward/raw_geo/std": 0.13822771617055088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1323.5625, "completions/mean_terminated_length": 1217.7000732421875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.0692138427685537, "frac_reward_zero_std": 0.0, "grad_norm": 2.8209262766980157, "kl": 0.002353668212890625, "learning_rate": 6.9e-07, "loss": -0.0114, "num_tokens": 15056006.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3590697646141052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2157673199946833, "rewards/wordcountpos_reward/raw_geo/std": 0.2541211332646892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1115.625, "completions/mean_terminated_length": 1090.0, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.06941388277655532, "frac_reward_zero_std": 0.0, "grad_norm": 2.6752427537410512, "kl": 0.0013446807861328125, "learning_rate": 6.919999999999999e-07, "loss": 0.0106, "num_tokens": 15100024.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0165367126464844, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06213331587041926, "rewards/wordcountpos_reward/raw_geo/std": 0.0663310457195454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1263.6875, "completions/mean_terminated_length": 1209.1539306640625, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.06961392278455691, "frac_reward_zero_std": 0.0, "grad_norm": 3.5208534028308445, "kl": 0.002899169921875, "learning_rate": 6.939999999999999e-07, "loss": 0.0, "num_tokens": 15149139.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8045864105224609, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.139632860388533, "rewards/wordcountpos_reward/raw_geo/std": 0.09975457244061715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.19925788241297684, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1255.8125, "completions/mean_terminated_length": 1255.8125, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.06981396279255851, "frac_reward_zero_std": 0.0, "grad_norm": 2.879589260932738, "kl": 0.0022430419921875, "learning_rate": 6.959999999999999e-07, "loss": 0.0049, "num_tokens": 15198080.0, "reward": -1.4901161193847656e-08, "reward_std": 0.998940110206604, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016722163423016047, "rewards/wordcountpos_reward/raw_geo/std": 0.07768241344881252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1194.625, "completions/mean_terminated_length": 1174.2667236328125, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.07001400280056011, "frac_reward_zero_std": 0.0, "grad_norm": 2.947983812382237, "kl": 0.0017719268798828125, "learning_rate": 6.979999999999999e-07, "loss": 0.0094, "num_tokens": 15240754.0, "reward": 0.0, "reward_std": 1.0460045337677002, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06416512868165652, "rewards/wordcountpos_reward/raw_geo/std": 0.11803918568625868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 1203.875, "completions/mean_terminated_length": 907.75, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.07021404280856171, "frac_reward_zero_std": 0.0, "grad_norm": 2.9159886477539008, "kl": 0.00200653076171875, "learning_rate": 7e-07, "loss": -0.0338, "num_tokens": 15284592.0, "reward": 2.60770320892334e-08, "reward_std": 1.0591906309127808, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08274944216271243, "rewards/wordcountpos_reward/raw_geo/std": 0.13913631224809797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1189.0625, "completions/mean_terminated_length": 1189.0625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.0704140828165633, "frac_reward_zero_std": 0.0, "grad_norm": 2.402210024944055, "kl": 0.0016193389892578125, "learning_rate": 7.019999999999999e-07, "loss": 0.0035, "num_tokens": 15328881.0, "reward": -7.450580596923828e-09, "reward_std": 1.0541963577270508, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.31289127576000936, "rewards/wordcountpos_reward/raw_geo/std": 0.12024119136861808, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078614, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1093.375, "completions/mean_terminated_length": 1093.375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.07061412282456492, "frac_reward_zero_std": 0.0, "grad_norm": 3.638271287037237, "kl": 0.00286102294921875, "learning_rate": 7.04e-07, "loss": -0.0579, "num_tokens": 15374119.0, "reward": 0.0, "reward_std": 1.0525254011154175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14673167580737695, "rewards/wordcountpos_reward/raw_geo/std": 0.12173009811252362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1250.4375, "completions/mean_terminated_length": 1214.7857666015625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.07081416283256652, "frac_reward_zero_std": 0.0, "grad_norm": 2.62637743631162, "kl": 0.0017414093017578125, "learning_rate": 7.059999999999999e-07, "loss": -0.0088, "num_tokens": 15426054.0, "reward": 7.450580596923828e-09, "reward_std": 1.0290229320526123, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05629722815652811, "rewards/wordcountpos_reward/raw_geo/std": 0.09274705617736778, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1044.0, "completions/max_terminated_length": 1044.0, "completions/mean_length": 919.625, "completions/mean_terminated_length": 919.625, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.07101420284056811, "frac_reward_zero_std": 0.0, "grad_norm": 2.6926984871409587, "kl": 0.0010986328125, "learning_rate": 7.079999999999999e-07, "loss": -0.0075, "num_tokens": 15456544.0, "reward": 0.0, "reward_std": 0.8328101634979248, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.003483548583850108, "rewards/wordcountpos_reward/raw_geo/std": 0.060410686429002144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1053.1875, "completions/mean_terminated_length": 989.357177734375, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.07121424284856971, "frac_reward_zero_std": 0.0, "grad_norm": 2.719211311166968, "kl": 0.001689910888671875, "learning_rate": 7.1e-07, "loss": 0.0219, "num_tokens": 15507139.0, "reward": 0.0, "reward_std": 0.5637771487236023, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15861353078116033, "rewards/wordcountpos_reward/raw_geo/std": 0.12625282240985225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1277.625, "completions/mean_terminated_length": 1262.800048828125, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.07141428285657131, "frac_reward_zero_std": 0.0, "grad_norm": 2.435609043224974, "kl": 0.0015420913696289062, "learning_rate": 7.119999999999999e-07, "loss": -0.0008, "num_tokens": 15555077.0, "reward": 7.450580596923828e-09, "reward_std": 0.9934263229370117, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.08861176358222685, "rewards/wordcountpos_reward/raw_geo/std": 0.05935167698723634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1033.0, "completions/mean_terminated_length": 1033.0, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.07161432286457292, "frac_reward_zero_std": 0.0, "grad_norm": 3.5375847418865582, "kl": 0.0025787353515625, "learning_rate": 7.14e-07, "loss": -0.0168, "num_tokens": 15605781.0, "reward": -1.4901161193847656e-08, "reward_std": 1.007598876953125, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06353798041978058, "rewards/wordcountpos_reward/raw_geo/std": 0.07467621574831491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 952.375, "completions/mean_terminated_length": 952.375, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.07181436287257452, "frac_reward_zero_std": 0.0, "grad_norm": 3.9649064099482034, "kl": 0.002960205078125, "learning_rate": 7.159999999999999e-07, "loss": -0.003, "num_tokens": 15647635.0, "reward": 1.862645149230957e-08, "reward_std": 1.0129902362823486, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4265952735055052, "rewards/wordcountpos_reward/raw_geo/std": 0.25485152846738385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1107.533447265625, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.07201440288057612, "frac_reward_zero_std": 0.0, "grad_norm": 3.2863929244211296, "kl": 0.0025634765625, "learning_rate": 7.179999999999999e-07, "loss": -0.0269, "num_tokens": 15696476.0, "reward": 0.0, "reward_std": 1.0001307725906372, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07299820639434745, "rewards/wordcountpos_reward/raw_geo/std": 0.1415088623491039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 977.6875, "completions/mean_terminated_length": 977.6875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.07221444288857772, "frac_reward_zero_std": 0.0, "grad_norm": 3.745734395705583, "kl": 0.00186920166015625, "learning_rate": 7.2e-07, "loss": -0.0665, "num_tokens": 15746055.0, "reward": 0.0, "reward_std": 0.8177444934844971, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.4038995998701616, "rewards/wordcountpos_reward/raw_geo/std": 0.13189988564737254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18614013040757266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1182.0625, "completions/mean_terminated_length": 1136.6429443359375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.07241448289657931, "frac_reward_zero_std": 0.0, "grad_norm": 2.723404620398981, "kl": 0.0018215179443359375, "learning_rate": 7.219999999999999e-07, "loss": 0.0398, "num_tokens": 15788336.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7788569331169128, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08275675925236076, "rewards/wordcountpos_reward/raw_geo/std": 0.17988156817836473, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1134.4375, "completions/mean_terminated_length": 1134.4375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.07261452290458091, "frac_reward_zero_std": 0.0, "grad_norm": 3.3954514626295818, "kl": 0.00255584716796875, "learning_rate": 7.24e-07, "loss": -0.0147, "num_tokens": 15830903.0, "reward": 7.450580596923828e-09, "reward_std": 1.0275144577026367, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14644673945227946, "rewards/wordcountpos_reward/raw_geo/std": 0.08761564023917168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 956.9375, "completions/mean_terminated_length": 956.9375, "completions/min_length": 685.0, "completions/min_terminated_length": 685.0, "epoch": 0.07281456291258252, "frac_reward_zero_std": 0.0, "grad_norm": 3.7847189671405994, "kl": 0.002666473388671875, "learning_rate": 7.259999999999999e-07, "loss": -0.0189, "num_tokens": 15860358.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8187500238418579, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2553505085671958, "rewards/wordcountpos_reward/raw_geo/std": 0.08274951313215693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1169.375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.07301460292058412, "frac_reward_zero_std": 0.0, "grad_norm": 3.0581275082954615, "kl": 0.002239227294921875, "learning_rate": 7.28e-07, "loss": 0.0099, "num_tokens": 15905596.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9600626230239868, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06784530403272701, "rewards/wordcountpos_reward/raw_geo/std": 0.06333157851365523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1283.1875, "completions/mean_terminated_length": 1233.1539306640625, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.07321464292858572, "frac_reward_zero_std": 0.0, "grad_norm": 3.226000283354174, "kl": 0.00252532958984375, "learning_rate": 7.3e-07, "loss": -0.0068, "num_tokens": 15950519.0, "reward": 0.0, "reward_std": 0.8436201214790344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04730227003181049, "rewards/wordcountpos_reward/raw_geo/std": 0.054344553171499776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1273.1875, "completions/mean_terminated_length": 1240.7857666015625, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.07341468293658732, "frac_reward_zero_std": 0.0, "grad_norm": 2.6199039133576987, "kl": 0.0017242431640625, "learning_rate": 7.319999999999999e-07, "loss": 0.0066, "num_tokens": 15993714.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5935419201850891, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23694796349751493, "rewards/wordcountpos_reward/raw_geo/std": 0.38556423279124885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1179.0, "completions/mean_terminated_length": 1179.0, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.07361472294458891, "frac_reward_zero_std": 0.0, "grad_norm": 1.6552059955084237, "kl": 0.0009813308715820312, "learning_rate": 7.34e-07, "loss": -0.0057, "num_tokens": 16030034.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9800270199775696, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013932727343423858, "rewards/wordcountpos_reward/raw_geo/std": 0.09059217439165791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1189.625, "completions/mean_terminated_length": 1118.0, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.07381476295259051, "frac_reward_zero_std": 0.0, "grad_norm": 2.464919564534785, "kl": 0.001739501953125, "learning_rate": 7.359999999999999e-07, "loss": 0.0331, "num_tokens": 16081180.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9230073690414429, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.215495302357515, "rewards/wordcountpos_reward/raw_geo/std": 0.14489154310100222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 844.0, "completions/max_terminated_length": 844.0, "completions/mean_length": 729.6875, "completions/mean_terminated_length": 729.6875, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.07401480296059212, "frac_reward_zero_std": 0.0, "grad_norm": 2.7724744810969266, "kl": 0.0010919570922851562, "learning_rate": 7.38e-07, "loss": -0.0378, "num_tokens": 16107423.0, "reward": -2.60770320892334e-08, "reward_std": 1.0266315937042236, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004095394133707565, "rewards/wordcountpos_reward/raw_geo/std": 0.10111930300139639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1082.0, "completions/mean_terminated_length": 1082.0, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.07421484296859372, "frac_reward_zero_std": 0.0, "grad_norm": 3.337922187398566, "kl": 0.00258636474609375, "learning_rate": 7.4e-07, "loss": -0.0056, "num_tokens": 16146495.0, "reward": 7.450580596923828e-09, "reward_std": 0.9355493187904358, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08420984558230026, "rewards/wordcountpos_reward/raw_geo/std": 0.07092232973915294, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1154.4375, "completions/mean_terminated_length": 1154.4375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.07441488297659532, "frac_reward_zero_std": 0.0, "grad_norm": 3.4966790057141486, "kl": 0.0025482177734375, "learning_rate": 7.42e-07, "loss": -0.0283, "num_tokens": 16179726.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8922228813171387, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16021923976249017, "rewards/wordcountpos_reward/raw_geo/std": 0.16667407594414502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1206.1875, "completions/mean_terminated_length": 1206.1875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.07461492298459692, "frac_reward_zero_std": 0.5, "grad_norm": 2.4432796827890972, "kl": 0.0020771026611328125, "learning_rate": 7.44e-07, "loss": -0.0247, "num_tokens": 16222353.0, "reward": 0.0, "reward_std": 0.17276452481746674, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17299111516469837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1091.9375, "completions/mean_terminated_length": 1064.7333984375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.07481496299259852, "frac_reward_zero_std": 0.0, "grad_norm": 3.5947584572137843, "kl": 0.0023651123046875, "learning_rate": 7.459999999999999e-07, "loss": -0.0623, "num_tokens": 16261392.0, "reward": 0.0, "reward_std": 0.694343090057373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03110712424574219, "rewards/wordcountpos_reward/raw_geo/std": 0.07291467283236489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1103.25, "completions/mean_terminated_length": 1103.25, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.07501500300060011, "frac_reward_zero_std": 0.0, "grad_norm": 3.6920906931805635, "kl": 0.002742767333984375, "learning_rate": 7.48e-07, "loss": 0.0046, "num_tokens": 16301604.0, "reward": -7.450580596923828e-09, "reward_std": 1.005752682685852, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0416080213652988, "rewards/wordcountpos_reward/raw_geo/std": 0.16309424007497925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.0885061203156784, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1203.625, "completions/mean_terminated_length": 1183.86669921875, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.07521504300860173, "frac_reward_zero_std": 0.0, "grad_norm": 3.3545317210242334, "kl": 0.0030364990234375, "learning_rate": 7.5e-07, "loss": -0.0152, "num_tokens": 16355902.0, "reward": 0.0, "reward_std": 0.5474947690963745, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1279259424335897, "rewards/wordcountpos_reward/raw_geo/std": 0.05761335791497305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13977495139343474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 836.5, "completions/mean_terminated_length": 836.5, "completions/min_length": 479.0, "completions/min_terminated_length": 479.0, "epoch": 0.07541508301660332, "frac_reward_zero_std": 0.0, "grad_norm": 3.6670445297461014, "kl": 0.002223968505859375, "learning_rate": 7.52e-07, "loss": -0.059, "num_tokens": 16406710.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8515207171440125, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013081117936095111, "rewards/wordcountpos_reward/raw_geo/std": 0.20368873903382947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1355373393953503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1107.5, "completions/mean_terminated_length": 1107.5, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.07561512302460492, "frac_reward_zero_std": 0.0, "grad_norm": 2.3727977004602265, "kl": 0.00145721435546875, "learning_rate": 7.54e-07, "loss": 0.0302, "num_tokens": 16442646.0, "reward": 5.960464477539063e-08, "reward_std": 0.5576227903366089, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03322198957390813, "rewards/wordcountpos_reward/raw_geo/std": 0.05738959065309491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 927.0, "completions/max_terminated_length": 927.0, "completions/mean_length": 859.875, "completions/mean_terminated_length": 859.875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.07581516303260652, "frac_reward_zero_std": 0.0, "grad_norm": 3.697805428091899, "kl": 0.002292633056640625, "learning_rate": 7.559999999999999e-07, "loss": -0.0114, "num_tokens": 16481564.0, "reward": 7.450580596923828e-09, "reward_std": 1.017039179801941, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05201075129225997, "rewards/wordcountpos_reward/raw_geo/std": 0.13288906275971712, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1062.8125, "completions/mean_terminated_length": 1062.8125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.07601520304060812, "frac_reward_zero_std": 0.0, "grad_norm": 3.5022729325806448, "kl": 0.0028076171875, "learning_rate": 7.58e-07, "loss": 0.0008, "num_tokens": 16530209.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0048627853393555, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2779941087806745, "rewards/wordcountpos_reward/raw_geo/std": 0.15260954899620252, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 962.25, "completions/mean_terminated_length": 962.25, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.07621524304860972, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682405188518723, "kl": 0.001377105712890625, "learning_rate": 7.599999999999999e-07, "loss": -0.0374, "num_tokens": 16565373.0, "reward": -3.725290298461914e-09, "reward_std": 1.0429153442382812, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.15955278935526343, "rewards/wordcountpos_reward/raw_geo/std": 0.2711222972079778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 995.6875, "completions/mean_terminated_length": 995.6875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.07641528305661133, "frac_reward_zero_std": 0.0, "grad_norm": 4.051261841121727, "kl": 0.002964019775390625, "learning_rate": 7.62e-07, "loss": -0.013, "num_tokens": 16604600.0, "reward": 2.9802322387695312e-08, "reward_std": 0.733407735824585, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.030587188553242783, "rewards/wordcountpos_reward/raw_geo/std": 0.14480982093759714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward/raw_rule/std": 0.1567612007930345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1141.625, "completions/mean_terminated_length": 1058.923095703125, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.07661532306461293, "frac_reward_zero_std": 0.0, "grad_norm": 3.1291709283741174, "kl": 0.002391815185546875, "learning_rate": 7.64e-07, "loss": 0.0193, "num_tokens": 16649978.0, "reward": -1.4901161193847656e-08, "reward_std": 1.06006920337677, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19834771329243037, "rewards/wordcountpos_reward/raw_geo/std": 0.051849302348137095, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1071.1875, "completions/mean_terminated_length": 1071.1875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.07681536307261452, "frac_reward_zero_std": 0.0, "grad_norm": 3.155979852848612, "kl": 0.00244903564453125, "learning_rate": 7.66e-07, "loss": -0.0006, "num_tokens": 16693333.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9485359191894531, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08430169140353466, "rewards/wordcountpos_reward/raw_geo/std": 0.08878377427896136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1277.9375, "completions/mean_terminated_length": 1226.6923828125, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.07701540308061612, "frac_reward_zero_std": 0.0, "grad_norm": 3.4938680489508194, "kl": 0.00286102294921875, "learning_rate": 7.68e-07, "loss": -0.0001, "num_tokens": 16741652.0, "reward": 0.0, "reward_std": 0.49249571561813354, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016940986172604366, "rewards/wordcountpos_reward/raw_geo/std": 0.14560916546917568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1881193474602995, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1225.4375, "completions/mean_terminated_length": 1207.1334228515625, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.07721544308861772, "frac_reward_zero_std": 0.0, "grad_norm": 3.2066611808788124, "kl": 0.00226593017578125, "learning_rate": 7.699999999999999e-07, "loss": 0.0304, "num_tokens": 16779883.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0592622756958008, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0827675471542851, "rewards/wordcountpos_reward/raw_geo/std": 0.123529620953116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1060.75, "completions/mean_terminated_length": 1031.4666748046875, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.07741548309661932, "frac_reward_zero_std": 0.0, "grad_norm": 2.9810427562465636, "kl": 0.0017032623291015625, "learning_rate": 7.72e-07, "loss": -0.0, "num_tokens": 16825695.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9820472002029419, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010793569474357844, "rewards/wordcountpos_reward/raw_geo/std": 0.05924348082856902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1031.1875, "completions/mean_terminated_length": 1031.1875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.07761552310462093, "frac_reward_zero_std": 0.0, "grad_norm": 2.554974134488524, "kl": 0.0013971328735351562, "learning_rate": 7.74e-07, "loss": -0.0251, "num_tokens": 16862946.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0310511589050293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026319768042349218, "rewards/wordcountpos_reward/raw_geo/std": 0.036495620418145504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1071.125, "completions/mean_terminated_length": 1071.125, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.07781556311262253, "frac_reward_zero_std": 0.0, "grad_norm": 2.593683306278425, "kl": 0.0015869140625, "learning_rate": 7.76e-07, "loss": 0.0171, "num_tokens": 16897148.0, "reward": -2.9802322387695312e-08, "reward_std": 0.48925772309303284, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013115534997961788, "rewards/wordcountpos_reward/raw_geo/std": 0.16101667523872967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1036.25, "completions/mean_terminated_length": 1036.25, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.07801560312062412, "frac_reward_zero_std": 0.0, "grad_norm": 3.419498859244468, "kl": 0.00218963623046875, "learning_rate": 7.78e-07, "loss": -0.0414, "num_tokens": 16938296.0, "reward": 0.0, "reward_std": 0.6891583204269409, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09224928843248108, "rewards/wordcountpos_reward/raw_geo/std": 0.0882766204011879, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1305.6875, "completions/mean_terminated_length": 1277.9285888671875, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.07821564312862572, "frac_reward_zero_std": 0.0, "grad_norm": 2.156767519126892, "kl": 0.0014171600341796875, "learning_rate": 7.799999999999999e-07, "loss": 0.0272, "num_tokens": 16978179.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3249605894088745, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007760756467150007, "rewards/wordcountpos_reward/raw_geo/std": 0.11473095785869254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036264, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1296.6875, "completions/mean_terminated_length": 1249.769287109375, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.07841568313662732, "frac_reward_zero_std": 0.0, "grad_norm": 3.141433864872489, "kl": 0.002628326416015625, "learning_rate": 7.82e-07, "loss": -0.0033, "num_tokens": 17023382.0, "reward": 0.0, "reward_std": 0.8168051242828369, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13975184520123504, "rewards/wordcountpos_reward/raw_geo/std": 0.1701501706722421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657014, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 954.5, "completions/mean_terminated_length": 954.5, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.07861572314462893, "frac_reward_zero_std": 0.0, "grad_norm": 3.4281220819388367, "kl": 0.0023212432861328125, "learning_rate": 7.84e-07, "loss": -0.0243, "num_tokens": 17062486.0, "reward": 0.0, "reward_std": 1.0324459075927734, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14868802691353358, "rewards/wordcountpos_reward/raw_geo/std": 0.06756757576121707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1189.3125, "completions/mean_terminated_length": 1168.60009765625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.07881576315263053, "frac_reward_zero_std": 0.0, "grad_norm": 3.4411843215430924, "kl": 0.002674102783203125, "learning_rate": 7.86e-07, "loss": 0.0152, "num_tokens": 17115011.0, "reward": -2.9802322387695312e-08, "reward_std": 1.044392704963684, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010901298165659726, "rewards/wordcountpos_reward/raw_geo/std": 0.070686649989382, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16903867626692443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1070.375, "completions/mean_terminated_length": 1041.7333984375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.07901580316063213, "frac_reward_zero_std": 0.0, "grad_norm": 3.698409341036159, "kl": 0.002765655517578125, "learning_rate": 7.88e-07, "loss": 0.0133, "num_tokens": 17153169.0, "reward": 2.2351741790771484e-08, "reward_std": 1.011796474456787, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15778116853439067, "rewards/wordcountpos_reward/raw_geo/std": 0.1135791091307533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 962.0625, "completions/mean_terminated_length": 962.0625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.07921584316863373, "frac_reward_zero_std": 0.0, "grad_norm": 4.266535853506466, "kl": 0.003360748291015625, "learning_rate": 7.9e-07, "loss": -0.0682, "num_tokens": 17202370.0, "reward": 0.0, "reward_std": 0.9920775294303894, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.32518239042982766, "rewards/wordcountpos_reward/raw_geo/std": 0.09599094127420793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1198.8125, "completions/mean_terminated_length": 1198.8125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.07941588317663532, "frac_reward_zero_std": 0.0, "grad_norm": 2.7291319741433715, "kl": 0.0017671585083007812, "learning_rate": 7.92e-07, "loss": -0.0227, "num_tokens": 17251183.0, "reward": 0.0, "reward_std": 0.8251668214797974, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11715859467007739, "rewards/wordcountpos_reward/raw_geo/std": 0.3571080445410963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1059.5625, "completions/mean_terminated_length": 1059.5625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.07961592318463692, "frac_reward_zero_std": 0.0, "grad_norm": 3.567656293324913, "kl": 0.003063201904296875, "learning_rate": 7.94e-07, "loss": 0.0143, "num_tokens": 17291760.0, "reward": 0.0, "reward_std": 0.802862286567688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10289364811521635, "rewards/wordcountpos_reward/raw_geo/std": 0.09350648310008419, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125757, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1042.6875, "completions/mean_terminated_length": 1042.6875, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.07981596319263853, "frac_reward_zero_std": 0.0, "grad_norm": 2.80989495743674, "kl": 0.0022687911987304688, "learning_rate": 7.96e-07, "loss": -0.0083, "num_tokens": 17340731.0, "reward": 0.0, "reward_std": 0.7733708620071411, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022759323063843154, "rewards/wordcountpos_reward/raw_geo/std": 0.180026115102723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1053.1875, "completions/mean_terminated_length": 1023.4000244140625, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.08001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 3.144527936613555, "kl": 0.0021305084228515625, "learning_rate": 7.98e-07, "loss": 0.023, "num_tokens": 17385518.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8499571084976196, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11949830124805697, "rewards/wordcountpos_reward/raw_geo/std": 0.2666749306717188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 979.4375, "completions/mean_terminated_length": 979.4375, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.08021604320864173, "frac_reward_zero_std": 0.0, "grad_norm": 3.508709941120798, "kl": 0.0024261474609375, "learning_rate": 8e-07, "loss": -0.0329, "num_tokens": 17429069.0, "reward": 0.0, "reward_std": 0.43606865406036377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2479166716961376, "rewards/wordcountpos_reward/raw_geo/std": 0.179032899543126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.15098442401882486, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.08041608321664333, "frac_reward_zero_std": 0.0, "grad_norm": 2.327948427968791, "kl": 0.0014133453369140625, "learning_rate": 8.02e-07, "loss": -0.0167, "num_tokens": 17470750.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7718532681465149, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07753804506847807, "rewards/wordcountpos_reward/raw_geo/std": 0.09357421607500979, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1106.4375, "completions/mean_terminated_length": 1106.4375, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.08061612322464493, "frac_reward_zero_std": 0.0, "grad_norm": 3.3431244094197554, "kl": 0.0024871826171875, "learning_rate": 8.04e-07, "loss": 0.0214, "num_tokens": 17515269.0, "reward": -5.960464477539063e-08, "reward_std": 0.7134137153625488, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08792860096913051, "rewards/wordcountpos_reward/raw_geo/std": 0.04886506479392634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1047.5625, "completions/mean_terminated_length": 1047.5625, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.08081616323264652, "frac_reward_zero_std": 0.0, "grad_norm": 2.168145350955324, "kl": 0.0009412765502929688, "learning_rate": 8.06e-07, "loss": -0.0164, "num_tokens": 17555086.0, "reward": 0.0, "reward_std": 0.6112060546875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26590774376568715, "rewards/wordcountpos_reward/raw_geo/std": 0.14494511802659826, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 899.375, "completions/mean_terminated_length": 899.375, "completions/min_length": 517.0, "completions/min_terminated_length": 517.0, "epoch": 0.08101620324064814, "frac_reward_zero_std": 0.0, "grad_norm": 3.2491245033249765, "kl": 0.0017833709716796875, "learning_rate": 8.08e-07, "loss": 0.0246, "num_tokens": 17584236.0, "reward": 0.0, "reward_std": 0.7021193504333496, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.031133583219200484, "rewards/wordcountpos_reward/raw_geo/std": 0.10159549543044888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1122.4375, "completions/mean_terminated_length": 1097.2667236328125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.08121624324864973, "frac_reward_zero_std": 0.0, "grad_norm": 2.6370857377470136, "kl": 0.00182342529296875, "learning_rate": 8.1e-07, "loss": 0.0206, "num_tokens": 17625963.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0228652954101562, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.377113685405611, "rewards/wordcountpos_reward/raw_geo/std": 0.1660329168743078, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1161.9375, "completions/mean_terminated_length": 1161.9375, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.08141628325665133, "frac_reward_zero_std": 0.5, "grad_norm": 0.6630523529996382, "kl": 0.0002644062042236328, "learning_rate": 8.12e-07, "loss": -0.0026, "num_tokens": 17670226.0, "reward": 1.862645149230957e-09, "reward_std": 0.7558939456939697, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07028589862331452, "rewards/wordcountpos_reward/raw_geo/std": 0.08567249499400444, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1095.625, "completions/mean_terminated_length": 1095.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.08161632326465293, "frac_reward_zero_std": 0.0, "grad_norm": 2.4742911999419115, "kl": 0.0014219284057617188, "learning_rate": 8.14e-07, "loss": -0.014, "num_tokens": 17704148.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9453210830688477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04634748267184062, "rewards/wordcountpos_reward/raw_geo/std": 0.03990545299577224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1060.875, "completions/mean_terminated_length": 1031.60009765625, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.08181636327265453, "frac_reward_zero_std": 0.0, "grad_norm": 3.4905110186322834, "kl": 0.002685546875, "learning_rate": 8.159999999999999e-07, "loss": -0.0254, "num_tokens": 17759058.0, "reward": 0.0, "reward_std": 0.5542085766792297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.038401116124592335, "rewards/wordcountpos_reward/raw_geo/std": 0.06764465109504178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.15000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1107.4375, "completions/mean_terminated_length": 1107.4375, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.08201640328065612, "frac_reward_zero_std": 0.0, "grad_norm": 3.6040708015111025, "kl": 0.003147125244140625, "learning_rate": 8.179999999999999e-07, "loss": 0.0323, "num_tokens": 17799313.0, "reward": 0.0, "reward_std": 0.7635582685470581, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0743550991284492, "rewards/wordcountpos_reward/raw_geo/std": 0.21166073809703154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 897.9375, "completions/mean_terminated_length": 897.9375, "completions/min_length": 374.0, "completions/min_terminated_length": 374.0, "epoch": 0.08221644328865774, "frac_reward_zero_std": 0.0, "grad_norm": 2.1855816161929686, "kl": 0.001605987548828125, "learning_rate": 8.199999999999999e-07, "loss": -0.0926, "num_tokens": 17847680.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7678371667861938, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15831153786108407, "rewards/wordcountpos_reward/raw_geo/std": 0.11065331094720791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 1030.0625, "completions/mean_terminated_length": 1030.0625, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.08241648329665933, "frac_reward_zero_std": 0.0, "grad_norm": 3.6285630837492997, "kl": 0.0033416748046875, "learning_rate": 8.219999999999999e-07, "loss": -0.021, "num_tokens": 17886993.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0661065578460693, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04436963539968915, "rewards/wordcountpos_reward/raw_geo/std": 0.035825588739241035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1025.3125, "completions/mean_terminated_length": 1025.3125, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.08261652330466093, "frac_reward_zero_std": 0.0, "grad_norm": 3.7347910278745347, "kl": 0.00293731689453125, "learning_rate": 8.24e-07, "loss": 0.0178, "num_tokens": 17924366.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6692298650741577, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13173742005776823, "rewards/wordcountpos_reward/raw_geo/std": 0.08107770013541364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470282, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1143.6875, "completions/mean_terminated_length": 1143.6875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.08281656331266253, "frac_reward_zero_std": 0.0, "grad_norm": 2.863739208362909, "kl": 0.00208282470703125, "learning_rate": 8.259999999999999e-07, "loss": 0.0021, "num_tokens": 17967657.0, "reward": -2.9802322387695312e-08, "reward_std": 0.439179390668869, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01590819878941648, "rewards/wordcountpos_reward/raw_geo/std": 0.15021720362942084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1010.0625, "completions/mean_terminated_length": 1010.0625, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.08301660332066413, "frac_reward_zero_std": 0.0, "grad_norm": 3.018415832433584, "kl": 0.0016231536865234375, "learning_rate": 8.28e-07, "loss": 0.019, "num_tokens": 18016514.0, "reward": -7.450580596923828e-09, "reward_std": 1.0669922828674316, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0885631528219577, "rewards/wordcountpos_reward/raw_geo/std": 0.06570832073225384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1113.0, "completions/mean_length": 1300.25, "completions/mean_terminated_length": 1100.5, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.08321664332866573, "frac_reward_zero_std": 0.0, "grad_norm": 2.497882683419577, "kl": 0.0019235610961914062, "learning_rate": 8.299999999999999e-07, "loss": -0.0316, "num_tokens": 18059894.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9635406732559204, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016915762273923017, "rewards/wordcountpos_reward/raw_geo/std": 0.09011074580858566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16307235385739852, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1115.3125, "completions/mean_terminated_length": 1115.3125, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.08341668333666734, "frac_reward_zero_std": 0.0, "grad_norm": 3.174205009600204, "kl": 0.002552032470703125, "learning_rate": 8.319999999999999e-07, "loss": 0.0017, "num_tokens": 18101019.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0140455961227417, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.036933062777199335, "rewards/wordcountpos_reward/raw_geo/std": 0.06099117174755285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1081.125, "completions/mean_terminated_length": 1081.125, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.08361672334466894, "frac_reward_zero_std": 0.0, "grad_norm": 2.454527432100527, "kl": 0.0016956329345703125, "learning_rate": 8.34e-07, "loss": -0.0485, "num_tokens": 18137725.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6814285516738892, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09292350969956907, "rewards/wordcountpos_reward/raw_geo/std": 0.16834987222546696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.17018508443151817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1172.125, "completions/mean_terminated_length": 1150.2667236328125, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.08381676335267053, "frac_reward_zero_std": 0.0, "grad_norm": 2.722696306391821, "kl": 0.00156402587890625, "learning_rate": 8.359999999999999e-07, "loss": -0.05, "num_tokens": 18186975.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0589263439178467, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0050790527715392254, "rewards/wordcountpos_reward/raw_geo/std": 0.0694348774419158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1182.625, "completions/mean_terminated_length": 1137.2857666015625, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.08401680336067213, "frac_reward_zero_std": 0.0, "grad_norm": 3.3174118662944974, "kl": 0.00286102294921875, "learning_rate": 8.38e-07, "loss": 0.0217, "num_tokens": 18231665.0, "reward": 0.0, "reward_std": 0.7940347194671631, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09525078176517329, "rewards/wordcountpos_reward/raw_geo/std": 0.10568430602401672, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 1062.75, "completions/mean_terminated_length": 1062.75, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.08421684336867373, "frac_reward_zero_std": 0.0, "grad_norm": 2.9858728858312538, "kl": 0.0021114349365234375, "learning_rate": 8.399999999999999e-07, "loss": -0.036, "num_tokens": 18282037.0, "reward": -1.30385160446167e-08, "reward_std": 1.0569286346435547, "rewards/wordcountpos_reward/mean": -1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09440094520792518, "rewards/wordcountpos_reward/raw_geo/std": 0.03961531189437775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1134.60009765625, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.08441688337667534, "frac_reward_zero_std": 0.0, "grad_norm": 3.1243534879094694, "kl": 0.002193450927734375, "learning_rate": 8.419999999999999e-07, "loss": -0.0037, "num_tokens": 18325220.0, "reward": 0.0, "reward_std": 0.9127192497253418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04266925463173475, "rewards/wordcountpos_reward/raw_geo/std": 0.056298800110485235, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1163.1875, "completions/mean_terminated_length": 1163.1875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.08461692338467694, "frac_reward_zero_std": 0.0, "grad_norm": 2.1314119505993783, "kl": 0.0006833076477050781, "learning_rate": 8.439999999999999e-07, "loss": -0.0299, "num_tokens": 18362383.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7901430130004883, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06132893237799952, "rewards/wordcountpos_reward/raw_geo/std": 0.1512981953109685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1318.0625, "completions/mean_terminated_length": 1257.416748046875, "completions/min_length": 1175.0, "completions/min_terminated_length": 1175.0, "epoch": 0.08481696339267854, "frac_reward_zero_std": 0.0, "grad_norm": 2.1400305966241824, "kl": 0.0016889572143554688, "learning_rate": 8.459999999999999e-07, "loss": -0.018, "num_tokens": 18410776.0, "reward": 0.0, "reward_std": 1.0197169780731201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027326407128577597, "rewards/wordcountpos_reward/raw_geo/std": 0.045678254096492704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1087.3125, "completions/mean_terminated_length": 1087.3125, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.08501700340068014, "frac_reward_zero_std": 0.0, "grad_norm": 3.5559784916879056, "kl": 0.003215789794921875, "learning_rate": 8.48e-07, "loss": 0.0016, "num_tokens": 18460037.0, "reward": 2.9802322387695312e-08, "reward_std": 0.620617151260376, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15150400127886596, "rewards/wordcountpos_reward/raw_geo/std": 0.22420005285980008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 845.5625, "completions/mean_terminated_length": 845.5625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.08521704340868173, "frac_reward_zero_std": 0.0, "grad_norm": 4.166213227779742, "kl": 0.00321197509765625, "learning_rate": 8.499999999999999e-07, "loss": -0.0762, "num_tokens": 18495766.0, "reward": 0.0, "reward_std": 0.66336989402771, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0876084842089189, "rewards/wordcountpos_reward/raw_geo/std": 0.15112486218954263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.08541708341668333, "frac_reward_zero_std": 0.0, "grad_norm": 3.2879679226867595, "kl": 0.002384185791015625, "learning_rate": 8.52e-07, "loss": 0.0132, "num_tokens": 18533379.0, "reward": 0.0, "reward_std": 0.9046326279640198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0975877822923059, "rewards/wordcountpos_reward/raw_geo/std": 0.2373126395944559, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1320.0, "completions/mean_terminated_length": 1278.4615478515625, "completions/min_length": 1098.0, "completions/min_terminated_length": 1098.0, "epoch": 0.08561712342468494, "frac_reward_zero_std": 0.0, "grad_norm": 3.1621827070877644, "kl": 0.002986907958984375, "learning_rate": 8.539999999999999e-07, "loss": 0.0269, "num_tokens": 18587243.0, "reward": 0.0, "reward_std": 0.956751823425293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2725917107765537, "rewards/wordcountpos_reward/raw_geo/std": 0.3118636050900945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1051.875, "completions/mean_terminated_length": 1051.875, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.08581716343268654, "frac_reward_zero_std": 0.0, "grad_norm": 3.30993313583991, "kl": 0.0030975341796875, "learning_rate": 8.559999999999999e-07, "loss": -0.0733, "num_tokens": 18631649.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0577553510665894, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07263579775687691, "rewards/wordcountpos_reward/raw_geo/std": 0.06826025246986418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1235.6875, "completions/mean_terminated_length": 1147.5833740234375, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.08601720344068814, "frac_reward_zero_std": 0.0, "grad_norm": 3.299305001474713, "kl": 0.00311279296875, "learning_rate": 8.58e-07, "loss": -0.0487, "num_tokens": 18675052.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8397456407546997, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06779078550927821, "rewards/wordcountpos_reward/raw_geo/std": 0.06631056658891393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1123.9375, "completions/mean_terminated_length": 1070.21435546875, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.08621724344868974, "frac_reward_zero_std": 0.0, "grad_norm": 3.2689711595423154, "kl": 0.0028076171875, "learning_rate": 8.599999999999999e-07, "loss": -0.0196, "num_tokens": 18714307.0, "reward": 0.0, "reward_std": 0.9198898077011108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029487683276100202, "rewards/wordcountpos_reward/raw_geo/std": 0.16314352642400448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15682025568335423, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1083.0, "completions/mean_terminated_length": 1083.0, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.08641728345669134, "frac_reward_zero_std": 0.0, "grad_norm": 2.580099266180246, "kl": 0.001789093017578125, "learning_rate": 8.62e-07, "loss": -0.0264, "num_tokens": 18754699.0, "reward": 0.0, "reward_std": 0.5988175272941589, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0619943848886873, "rewards/wordcountpos_reward/raw_geo/std": 0.190361213697201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1308.4375, "completions/mean_terminated_length": 1193.5, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.08661732346469293, "frac_reward_zero_std": 0.0, "grad_norm": 3.3061758103046195, "kl": 0.002994537353515625, "learning_rate": 8.639999999999999e-07, "loss": -0.0086, "num_tokens": 18807722.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8916733264923096, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1431637622634425, "rewards/wordcountpos_reward/raw_geo/std": 0.055878666223157104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1197992147380435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1209.25, "completions/mean_terminated_length": 1077.0909423828125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.08681736347269454, "frac_reward_zero_std": 0.0, "grad_norm": 3.3136994035699843, "kl": 0.00252532958984375, "learning_rate": 8.659999999999999e-07, "loss": 0.0123, "num_tokens": 18856574.0, "reward": 0.0, "reward_std": 0.6713405847549438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03645705260354844, "rewards/wordcountpos_reward/raw_geo/std": 0.08884293456383821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.133263870794973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1005.25, "completions/mean_terminated_length": 1005.25, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.08701740348069614, "frac_reward_zero_std": 0.0, "grad_norm": 3.739858934924631, "kl": 0.0022830963134765625, "learning_rate": 8.68e-07, "loss": -0.0203, "num_tokens": 18887154.0, "reward": 0.0, "reward_std": 0.8702331781387329, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003987106773799781, "rewards/wordcountpos_reward/raw_geo/std": 0.04258197403398292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1306.4375, "completions/mean_terminated_length": 1261.769287109375, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.08721744348869774, "frac_reward_zero_std": 0.0, "grad_norm": 3.2976868680562763, "kl": 0.003086090087890625, "learning_rate": 8.699999999999999e-07, "loss": -0.0189, "num_tokens": 18939729.0, "reward": 0.0, "reward_std": 1.0671062469482422, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3183368034364289, "rewards/wordcountpos_reward/raw_geo/std": 0.29246018487084263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1095.0, "completions/mean_terminated_length": 1095.0, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.08741748349669934, "frac_reward_zero_std": 0.0, "grad_norm": 3.0685868225747104, "kl": 0.0023365020751953125, "learning_rate": 8.72e-07, "loss": -0.0397, "num_tokens": 18988665.0, "reward": -5.960464477539063e-08, "reward_std": 0.3992195129394531, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07773019844889655, "rewards/wordcountpos_reward/raw_geo/std": 0.17176865459540083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1018.3077392578125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.08761752350470094, "frac_reward_zero_std": 0.0, "grad_norm": 3.046517645830191, "kl": 0.002349853515625, "learning_rate": 8.739999999999999e-07, "loss": -0.0278, "num_tokens": 19028155.0, "reward": 0.0, "reward_std": 0.9707126617431641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.037594242635203765, "rewards/wordcountpos_reward/raw_geo/std": 0.11029408568025839, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1063.0625, "completions/mean_terminated_length": 1063.0625, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.08781756351270253, "frac_reward_zero_std": 0.0, "grad_norm": 2.4351577943229126, "kl": 0.0015048980712890625, "learning_rate": 8.76e-07, "loss": -0.0262, "num_tokens": 19071540.0, "reward": 0.0, "reward_std": 0.6890659332275391, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3749114757417403, "rewards/wordcountpos_reward/raw_geo/std": 0.3589918747478618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1210.5, "completions/mean_terminated_length": 1114.0, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.08801760352070415, "frac_reward_zero_std": 0.0, "grad_norm": 3.180666596159949, "kl": 0.002948760986328125, "learning_rate": 8.78e-07, "loss": 0.0119, "num_tokens": 19115948.0, "reward": 0.0, "reward_std": 0.8787262439727783, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027289230439799333, "rewards/wordcountpos_reward/raw_geo/std": 0.180860792186217, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1135.75, "completions/mean_terminated_length": 1135.75, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.08821764352870574, "frac_reward_zero_std": 0.0, "grad_norm": 3.201098455785099, "kl": 0.003116607666015625, "learning_rate": 8.799999999999999e-07, "loss": -0.0239, "num_tokens": 19166248.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7383455634117126, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10906757176808307, "rewards/wordcountpos_reward/raw_geo/std": 0.10850857774550682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1266.6875, "completions/mean_terminated_length": 1188.916748046875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.08841768353670734, "frac_reward_zero_std": 0.0, "grad_norm": 3.244916872938238, "kl": 0.003330230712890625, "learning_rate": 8.82e-07, "loss": -0.0236, "num_tokens": 19219771.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5060003995895386, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05112165519476513, "rewards/wordcountpos_reward/raw_geo/std": 0.08143172569982593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 987.25, "completions/mean_terminated_length": 987.25, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.08861772354470894, "frac_reward_zero_std": 0.0, "grad_norm": 3.801227701350928, "kl": 0.00263214111328125, "learning_rate": 8.839999999999999e-07, "loss": -0.0038, "num_tokens": 19259527.0, "reward": -5.960464477539063e-08, "reward_std": 0.864219069480896, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059527251674027566, "rewards/wordcountpos_reward/raw_geo/std": 0.09559034088448276, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 930.4375, "completions/mean_terminated_length": 930.4375, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.08881776355271054, "frac_reward_zero_std": 0.0, "grad_norm": 3.4880641149596223, "kl": 0.00276947021484375, "learning_rate": 8.86e-07, "loss": -0.0167, "num_tokens": 19299238.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7947709560394287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15473765789872287, "rewards/wordcountpos_reward/raw_geo/std": 0.22052653973512842, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1281.9375, "completions/mean_terminated_length": 1231.615478515625, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.08901780356071214, "frac_reward_zero_std": 0.0, "grad_norm": 3.4593938441905485, "kl": 0.00312042236328125, "learning_rate": 8.88e-07, "loss": -0.0204, "num_tokens": 19345069.0, "reward": 0.0, "reward_std": 0.7192075252532959, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.060805674299536865, "rewards/wordcountpos_reward/raw_geo/std": 0.1540085555548371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1106.9375, "completions/mean_terminated_length": 1016.2308349609375, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.08921784356871375, "frac_reward_zero_std": 0.0, "grad_norm": 2.3577051480288223, "kl": 0.0016155242919921875, "learning_rate": 8.9e-07, "loss": 0.0156, "num_tokens": 19393796.0, "reward": 0.0, "reward_std": 0.8608711957931519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10506878252243318, "rewards/wordcountpos_reward/raw_geo/std": 0.07971488671181962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1078.1875, "completions/mean_terminated_length": 1050.0667724609375, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.08941788357671535, "frac_reward_zero_std": 0.0, "grad_norm": 3.7693851524506212, "kl": 0.0038604736328125, "learning_rate": 8.92e-07, "loss": -0.0196, "num_tokens": 19446863.0, "reward": 5.587935447692871e-09, "reward_std": 1.0194907188415527, "rewards/wordcountpos_reward/mean": 5.587935447692871e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.23204758343737142, "rewards/wordcountpos_reward/raw_geo/std": 0.263949380435127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1207.3125, "completions/mean_terminated_length": 1109.75, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.08961792358471694, "frac_reward_zero_std": 0.0, "grad_norm": 2.9068800631573892, "kl": 0.00243377685546875, "learning_rate": 8.939999999999999e-07, "loss": 0.0011, "num_tokens": 19499324.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8448344469070435, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1561754058795112, "rewards/wordcountpos_reward/raw_geo/std": 0.09686430726169433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1077.5625, "completions/mean_terminated_length": 1077.5625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.08981796359271854, "frac_reward_zero_std": 0.0, "grad_norm": 2.7248304281811837, "kl": 0.0017337799072265625, "learning_rate": 8.96e-07, "loss": 0.0125, "num_tokens": 19548149.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0590662956237793, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027372537861726533, "rewards/wordcountpos_reward/raw_geo/std": 0.11804690267772838, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055637, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1274.0, "completions/mean_terminated_length": 1258.933349609375, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.09001800360072014, "frac_reward_zero_std": 0.0, "grad_norm": 3.318786933750916, "kl": 0.0032501220703125, "learning_rate": 8.98e-07, "loss": 0.0373, "num_tokens": 19596029.0, "reward": 7.450580596923828e-09, "reward_std": 1.0158376693725586, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11766312978343946, "rewards/wordcountpos_reward/raw_geo/std": 0.09973759725612268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382576, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1108.375, "completions/mean_terminated_length": 1108.375, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.09021804360872174, "frac_reward_zero_std": 0.0, "grad_norm": 3.0416041909251246, "kl": 0.0021419525146484375, "learning_rate": 9e-07, "loss": -0.052, "num_tokens": 19647387.0, "reward": -1.4901161193847656e-08, "reward_std": 0.946031928062439, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02360611013471328, "rewards/wordcountpos_reward/raw_geo/std": 0.11839692479058546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1105.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09041808361672335, "frac_reward_zero_std": 0.0, "grad_norm": 3.36723731552191, "kl": 0.002902984619140625, "learning_rate": 9.02e-07, "loss": -0.0397, "num_tokens": 19698469.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5664991140365601, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019582839167614552, "rewards/wordcountpos_reward/raw_geo/std": 0.14939457389724295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1025.9375, "completions/mean_terminated_length": 994.3333740234375, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.09061812362472495, "frac_reward_zero_std": 0.0, "grad_norm": 3.7303988369400694, "kl": 0.00295257568359375, "learning_rate": 9.039999999999999e-07, "loss": -0.0383, "num_tokens": 19736444.0, "reward": -7.450580596923828e-09, "reward_std": 1.0268616676330566, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010483479188954837, "rewards/wordcountpos_reward/raw_geo/std": 0.04943226496645019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.18252346373772008, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1000.5625, "completions/mean_terminated_length": 929.21435546875, "completions/min_length": 556.0, "completions/min_terminated_length": 556.0, "epoch": 0.09081816363272655, "frac_reward_zero_std": 0.0, "grad_norm": 3.157173861491531, "kl": 0.0021572113037109375, "learning_rate": 9.06e-07, "loss": -0.0327, "num_tokens": 19791541.0, "reward": 0.0, "reward_std": 0.9003146886825562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04191839017513677, "rewards/wordcountpos_reward/raw_geo/std": 0.10478729870999097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1097.0625, "completions/mean_terminated_length": 1070.2000732421875, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.09101820364072814, "frac_reward_zero_std": 0.0, "grad_norm": 3.171057841397273, "kl": 0.0021343231201171875, "learning_rate": 9.08e-07, "loss": -0.0449, "num_tokens": 19830086.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5525245666503906, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09104565016963011, "rewards/wordcountpos_reward/raw_geo/std": 0.088891372838374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1045.875, "completions/mean_terminated_length": 1045.875, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.09121824364872974, "frac_reward_zero_std": 0.0, "grad_norm": 3.580149128499416, "kl": 0.003387451171875, "learning_rate": 9.1e-07, "loss": -0.0217, "num_tokens": 19874276.0, "reward": 0.0, "reward_std": 0.4022776782512665, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16431213167510778, "rewards/wordcountpos_reward/raw_geo/std": 0.40338450210035404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1062.4375, "completions/mean_terminated_length": 1033.2667236328125, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.09141828365673135, "frac_reward_zero_std": 0.0, "grad_norm": 2.6523201988007066, "kl": 0.0021839141845703125, "learning_rate": 9.12e-07, "loss": 0.044, "num_tokens": 19915979.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0247694253921509, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054765778931615196, "rewards/wordcountpos_reward/raw_geo/std": 0.10303805703992713, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 985.1875, "completions/mean_terminated_length": 950.86669921875, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.09161832366473295, "frac_reward_zero_std": 0.0, "grad_norm": 2.9604431023636972, "kl": 0.001804351806640625, "learning_rate": 9.14e-07, "loss": 0.0146, "num_tokens": 19949886.0, "reward": 0.0, "reward_std": 0.9891246557235718, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014044014781523294, "rewards/wordcountpos_reward/raw_geo/std": 0.162384230061039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 868.25, "completions/mean_terminated_length": 868.25, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.09181836367273455, "frac_reward_zero_std": 0.0, "grad_norm": 3.3286787778973164, "kl": 0.002689361572265625, "learning_rate": 9.16e-07, "loss": -0.0033, "num_tokens": 19988010.0, "reward": -5.960464477539063e-08, "reward_std": 0.8544387817382812, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10540768075397114, "rewards/wordcountpos_reward/raw_geo/std": 0.17074815818788974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1314.0625, "completions/mean_terminated_length": 1229.5455322265625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.09201840368073615, "frac_reward_zero_std": 0.0, "grad_norm": 1.683631270482807, "kl": 0.0011749267578125, "learning_rate": 9.18e-07, "loss": 0.0296, "num_tokens": 20030211.0, "reward": 0.0, "reward_std": 0.6370956897735596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2826937558590972, "rewards/wordcountpos_reward/raw_geo/std": 0.3317353558341128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 941.125, "completions/mean_terminated_length": 941.125, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.09221844368873774, "frac_reward_zero_std": 0.0, "grad_norm": 3.5927098932667105, "kl": 0.0031414031982421875, "learning_rate": 9.2e-07, "loss": -0.0582, "num_tokens": 20084397.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7087376117706299, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16634700842841904, "rewards/wordcountpos_reward/raw_geo/std": 0.08169764145115765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1189459883650901, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1282.3125, "completions/mean_terminated_length": 1209.75, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.09241848369673934, "frac_reward_zero_std": 0.0, "grad_norm": 2.9637117047520802, "kl": 0.0028533935546875, "learning_rate": 9.22e-07, "loss": -0.0477, "num_tokens": 20135162.0, "reward": 0.0, "reward_std": 0.9201784133911133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04087278107322107, "rewards/wordcountpos_reward/raw_geo/std": 0.09916782732043211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1166.0, "completions/mean_terminated_length": 1014.1818237304688, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.09261852370474095, "frac_reward_zero_std": 0.0, "grad_norm": 3.4496587125355274, "kl": 0.00307464599609375, "learning_rate": 9.24e-07, "loss": 0.0141, "num_tokens": 20186546.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0554344654083252, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06641499834745053, "rewards/wordcountpos_reward/raw_geo/std": 0.06972501744383654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1037.0, "completions/mean_terminated_length": 1037.0, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.09281856371274255, "frac_reward_zero_std": 0.0, "grad_norm": 3.4791752076316773, "kl": 0.003498077392578125, "learning_rate": 9.26e-07, "loss": -0.0024, "num_tokens": 20227266.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0228264331817627, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05289641691077268, "rewards/wordcountpos_reward/raw_geo/std": 0.06736946918752376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1216.5, "completions/mean_terminated_length": 1197.60009765625, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.09301860372074415, "frac_reward_zero_std": 0.0, "grad_norm": 2.5043722242181445, "kl": 0.0016031265258789062, "learning_rate": 9.28e-07, "loss": 0.0057, "num_tokens": 20271642.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9817145466804504, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12695525480440295, "rewards/wordcountpos_reward/raw_geo/std": 0.06893485379345207, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06070572613176771, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 929.0, "completions/max_terminated_length": 929.0, "completions/mean_length": 794.8125, "completions/mean_terminated_length": 794.8125, "completions/min_length": 545.0, "completions/min_terminated_length": 545.0, "epoch": 0.09321864372874575, "frac_reward_zero_std": 0.0, "grad_norm": 4.16050644224258, "kl": 0.003055572509765625, "learning_rate": 9.3e-07, "loss": 0.0357, "num_tokens": 20311055.0, "reward": -7.450580596923828e-09, "reward_std": 1.0447560548782349, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0017178301668119714, "rewards/wordcountpos_reward/raw_geo/std": 0.006871320667247886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 1051.3125, "completions/mean_terminated_length": 1021.4000244140625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.09341868373674735, "frac_reward_zero_std": 0.0, "grad_norm": 3.2870134690019093, "kl": 0.002498626708984375, "learning_rate": 9.32e-07, "loss": 0.0004, "num_tokens": 20349572.0, "reward": 0.0, "reward_std": 0.7005062699317932, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011948765149479162, "rewards/wordcountpos_reward/raw_geo/std": 0.04819504353936146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1018.875, "completions/mean_terminated_length": 986.800048828125, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.09361872374474894, "frac_reward_zero_std": 0.0, "grad_norm": 3.121983313684933, "kl": 0.002147674560546875, "learning_rate": 9.34e-07, "loss": 0.0256, "num_tokens": 20391170.0, "reward": 0.0, "reward_std": 1.006962776184082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014171749608429755, "rewards/wordcountpos_reward/raw_geo/std": 0.08439567214228119, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1088662107903635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1192.4375, "completions/mean_terminated_length": 1192.4375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.09381876375275056, "frac_reward_zero_std": 0.0, "grad_norm": 2.684943131551981, "kl": 0.0020389556884765625, "learning_rate": 9.36e-07, "loss": -0.0193, "num_tokens": 20424025.0, "reward": 0.0, "reward_std": 0.4325961470603943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027337552314660955, "rewards/wordcountpos_reward/raw_geo/std": 0.07482321122107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1236.1875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.09401880376075215, "frac_reward_zero_std": 0.0, "grad_norm": 2.095362318853056, "kl": 0.0007042884826660156, "learning_rate": 9.379999999999998e-07, "loss": 0.0008, "num_tokens": 20466852.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9703868627548218, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09096523782005494, "rewards/wordcountpos_reward/raw_geo/std": 0.1597409795726704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.09421884376875375, "frac_reward_zero_std": 0.0, "grad_norm": 2.4935716459119375, "kl": 0.002117156982421875, "learning_rate": 9.399999999999999e-07, "loss": -0.0348, "num_tokens": 20514764.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8660947680473328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04001216727402988, "rewards/wordcountpos_reward/raw_geo/std": 0.10160015094173779, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1173.5625, "completions/mean_terminated_length": 1173.5625, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.09441888377675535, "frac_reward_zero_std": 0.0, "grad_norm": 2.5935614888674263, "kl": 0.0017590522766113281, "learning_rate": 9.419999999999999e-07, "loss": 0.0054, "num_tokens": 20557757.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9719048738479614, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06503271457099888, "rewards/wordcountpos_reward/raw_geo/std": 0.14974833025692413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.09461892378475695, "frac_reward_zero_std": 0.0, "grad_norm": 3.3986497450490534, "kl": 0.002597808837890625, "learning_rate": 9.439999999999999e-07, "loss": 0.0448, "num_tokens": 20606269.0, "reward": 0.0, "reward_std": 0.4908173084259033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05732191105705363, "rewards/wordcountpos_reward/raw_geo/std": 0.14266987840195366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1096.8125, "completions/mean_terminated_length": 1096.8125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.09481896379275855, "frac_reward_zero_std": 0.0, "grad_norm": 3.0694386003965044, "kl": 0.0021696090698242188, "learning_rate": 9.459999999999999e-07, "loss": -0.0266, "num_tokens": 20652386.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9674227237701416, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15948392931483177, "rewards/wordcountpos_reward/raw_geo/std": 0.14433339806232215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 932.625, "completions/mean_terminated_length": 932.625, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.09501900380076016, "frac_reward_zero_std": 0.0, "grad_norm": 3.2306023985529873, "kl": 0.001682281494140625, "learning_rate": 9.479999999999999e-07, "loss": -0.052, "num_tokens": 20693556.0, "reward": 0.0, "reward_std": 0.8287612795829773, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05784487664166692, "rewards/wordcountpos_reward/raw_geo/std": 0.1167221257573027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390615, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1066.125, "completions/mean_terminated_length": 1066.125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.09521904380876176, "frac_reward_zero_std": 0.0, "grad_norm": 1.3882257807466178, "kl": 0.0004857778549194336, "learning_rate": 9.499999999999999e-07, "loss": -0.0003, "num_tokens": 20742846.0, "reward": 0.0, "reward_std": 0.9937007427215576, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028478928645724357, "rewards/wordcountpos_reward/raw_geo/std": 0.0800622126073824, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185828, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1134.1875, "completions/mean_terminated_length": 1134.1875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.09541908381676335, "frac_reward_zero_std": 0.0, "grad_norm": 3.4461237601519192, "kl": 0.003337860107421875, "learning_rate": 9.52e-07, "loss": -0.0145, "num_tokens": 20780905.0, "reward": 0.0, "reward_std": 1.0549769401550293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0020607187762826987, "rewards/wordcountpos_reward/raw_geo/std": 0.09353046792228852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1307.5, "completions/mean_terminated_length": 1307.5, "completions/min_length": 1091.0, "completions/min_terminated_length": 1091.0, "epoch": 0.09561912382476495, "frac_reward_zero_std": 0.0, "grad_norm": 2.610722762470047, "kl": 0.00225067138671875, "learning_rate": 9.539999999999999e-07, "loss": -0.0187, "num_tokens": 20839105.0, "reward": 0.0, "reward_std": 0.759341835975647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029143578409760205, "rewards/wordcountpos_reward/raw_geo/std": 0.05348086018287396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1413.6875, "completions/mean_terminated_length": 1327.375, "completions/min_length": 1168.0, "completions/min_terminated_length": 1168.0, "epoch": 0.09581916383276655, "frac_reward_zero_std": 0.0, "grad_norm": 1.4588838362600443, "kl": 0.0006351470947265625, "learning_rate": 9.559999999999998e-07, "loss": -0.0178, "num_tokens": 20875980.0, "reward": 0.0, "reward_std": 0.9503426551818848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1072165083130928, "rewards/wordcountpos_reward/raw_geo/std": 0.13808855888773275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 840.8125, "completions/mean_terminated_length": 796.86669921875, "completions/min_length": 409.0, "completions/min_terminated_length": 409.0, "epoch": 0.09601920384076815, "frac_reward_zero_std": 0.0, "grad_norm": 2.7926570468671876, "kl": 0.0018596649169921875, "learning_rate": 9.58e-07, "loss": -0.0747, "num_tokens": 20905481.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7571004629135132, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0034355670983920433, "rewards/wordcountpos_reward/raw_geo/std": 0.1322509009792835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1219.6875, "completions/mean_terminated_length": 1179.6429443359375, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.09621924384876976, "frac_reward_zero_std": 0.0, "grad_norm": 2.9461889410208433, "kl": 0.002044677734375, "learning_rate": 9.6e-07, "loss": 0.0471, "num_tokens": 20944196.0, "reward": -2.9802322387695312e-08, "reward_std": 0.776721715927124, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009757609977105022, "rewards/wordcountpos_reward/raw_geo/std": 0.08117345549545481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1258.625, "completions/mean_terminated_length": 1258.625, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.09641928385677136, "frac_reward_zero_std": 0.0, "grad_norm": 2.4707941232157884, "kl": 0.0020122528076171875, "learning_rate": 9.619999999999999e-07, "loss": 0.0098, "num_tokens": 20981870.0, "reward": 0.0, "reward_std": 1.058112382888794, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0910334419908491, "rewards/wordcountpos_reward/raw_geo/std": 0.048551581882787916, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1013.5625, "completions/mean_terminated_length": 1013.5625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.09661932386477295, "frac_reward_zero_std": 0.0, "grad_norm": 2.770317252624303, "kl": 0.0017871856689453125, "learning_rate": 9.64e-07, "loss": 0.0096, "num_tokens": 21023535.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0308070182800293, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11234302193644059, "rewards/wordcountpos_reward/raw_geo/std": 0.2621016406184735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 918.25, "completions/mean_terminated_length": 918.25, "completions/min_length": 640.0, "completions/min_terminated_length": 640.0, "epoch": 0.09681936387277455, "frac_reward_zero_std": 0.0, "grad_norm": 3.847537337325355, "kl": 0.003387451171875, "learning_rate": 9.66e-07, "loss": -0.0322, "num_tokens": 21060539.0, "reward": 0.0, "reward_std": 0.5896586179733276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06407387064858733, "rewards/wordcountpos_reward/raw_geo/std": 0.13089185255501323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1138.3125, "completions/mean_terminated_length": 1138.3125, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.09701940388077615, "frac_reward_zero_std": 0.0, "grad_norm": 2.1138505375212415, "kl": 0.0018100738525390625, "learning_rate": 9.679999999999999e-07, "loss": -0.0142, "num_tokens": 21104968.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9472931623458862, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07957419468080931, "rewards/wordcountpos_reward/raw_geo/std": 0.08949099042844877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1338185615204685, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1156.3125, "completions/mean_terminated_length": 1133.4000244140625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.09721944388877776, "frac_reward_zero_std": 0.0, "grad_norm": 3.163610426942226, "kl": 0.003108978271484375, "learning_rate": 9.7e-07, "loss": 0.0222, "num_tokens": 21148709.0, "reward": 0.0, "reward_std": 0.8028128147125244, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09171651298782155, "rewards/wordcountpos_reward/raw_geo/std": 0.27495332105799164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081414, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1184.875, "completions/mean_terminated_length": 1184.875, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.09741948389677936, "frac_reward_zero_std": 0.0, "grad_norm": 3.440813258319072, "kl": 0.002994537353515625, "learning_rate": 9.72e-07, "loss": 0.0383, "num_tokens": 21200035.0, "reward": 0.0, "reward_std": 0.838242769241333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08261366674540484, "rewards/wordcountpos_reward/raw_geo/std": 0.07967757495576122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.25265259415516267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1141.5, "completions/mean_terminated_length": 1141.5, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.09761952390478096, "frac_reward_zero_std": 0.0, "grad_norm": 2.6848111587225514, "kl": 0.0020599365234375, "learning_rate": 9.74e-07, "loss": -0.0204, "num_tokens": 21234211.0, "reward": 0.0, "reward_std": 0.5016034841537476, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01833172982551814, "rewards/wordcountpos_reward/raw_geo/std": 0.13815844082096307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 999.375, "completions/mean_terminated_length": 999.375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09781956391278256, "frac_reward_zero_std": 0.0, "grad_norm": 3.8610230532854484, "kl": 0.003719329833984375, "learning_rate": 9.759999999999998e-07, "loss": 0.0163, "num_tokens": 21277873.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5087968111038208, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029772728156827268, "rewards/wordcountpos_reward/raw_geo/std": 0.14740463180699628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1359.3125, "completions/mean_terminated_length": 1326.84619140625, "completions/min_length": 1168.0, "completions/min_terminated_length": 1168.0, "epoch": 0.09801960392078415, "frac_reward_zero_std": 0.0, "grad_norm": 2.7083956629899415, "kl": 0.002582550048828125, "learning_rate": 9.78e-07, "loss": 0.0157, "num_tokens": 21330934.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9627735614776611, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11160320443813755, "rewards/wordcountpos_reward/raw_geo/std": 0.11353641005806972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1200.3125, "completions/mean_terminated_length": 1157.5, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.09821964392878575, "frac_reward_zero_std": 0.0, "grad_norm": 2.876192888327194, "kl": 0.002735137939453125, "learning_rate": 9.8e-07, "loss": -0.0199, "num_tokens": 21386075.0, "reward": -7.450580596923828e-09, "reward_std": 1.0218180418014526, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10738691415552819, "rewards/wordcountpos_reward/raw_geo/std": 0.06044878387629908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward/raw_rule/std": 0.1796601730428249, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1120.625, "completions/mean_terminated_length": 1120.625, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.09841968393678736, "frac_reward_zero_std": 0.0, "grad_norm": 2.532748681692034, "kl": 0.002788543701171875, "learning_rate": 9.819999999999999e-07, "loss": -0.0129, "num_tokens": 21421573.0, "reward": 0.0, "reward_std": 0.8296001553535461, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.028184053989471766, "rewards/wordcountpos_reward/raw_geo/std": 0.10575152905320613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 993.625, "completions/mean_terminated_length": 993.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.09861972394478896, "frac_reward_zero_std": 0.0, "grad_norm": 3.7098073803652145, "kl": 0.003170013427734375, "learning_rate": 9.84e-07, "loss": -0.026, "num_tokens": 21459607.0, "reward": 7.450580596923828e-09, "reward_std": 1.065168857574463, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.014650490817114411, "rewards/wordcountpos_reward/raw_geo/std": 0.07698630368220064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1193.3125, "completions/mean_terminated_length": 1091.0833740234375, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.09881976395279056, "frac_reward_zero_std": 0.0, "grad_norm": 3.351053765361233, "kl": 0.003322601318359375, "learning_rate": 9.86e-07, "loss": -0.0419, "num_tokens": 21499092.0, "reward": 2.9802322387695312e-08, "reward_std": 0.576299786567688, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09818906152562937, "rewards/wordcountpos_reward/raw_geo/std": 0.11096192058241681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1049.0, "completions/max_terminated_length": 1049.0, "completions/mean_length": 991.3125, "completions/mean_terminated_length": 991.3125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.09901980396079216, "frac_reward_zero_std": 0.0, "grad_norm": 2.7093974238990075, "kl": 0.0017337799072265625, "learning_rate": 9.88e-07, "loss": 0.0047, "num_tokens": 21533809.0, "reward": -2.9802322387695312e-08, "reward_std": 0.914000391960144, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15058066876217352, "rewards/wordcountpos_reward/raw_geo/std": 0.05065428516809082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1337.0625, "completions/mean_terminated_length": 1239.300048828125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.09921984396879376, "frac_reward_zero_std": 0.0, "grad_norm": 3.305958590103276, "kl": 0.00327301025390625, "learning_rate": 9.9e-07, "loss": 0.0512, "num_tokens": 21573442.0, "reward": 0.0, "reward_std": 0.8479118347167969, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17085014681717073, "rewards/wordcountpos_reward/raw_geo/std": 0.23646436101757687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1079.5625, "completions/mean_terminated_length": 1079.5625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.09941988397679535, "frac_reward_zero_std": 0.0, "grad_norm": 2.237181195884694, "kl": 0.0020618438720703125, "learning_rate": 9.92e-07, "loss": -0.0268, "num_tokens": 21605995.0, "reward": 0.0, "reward_std": 0.7448483109474182, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.054934752271677906, "rewards/wordcountpos_reward/raw_geo/std": 0.14111626439720662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1188.1875, "completions/mean_terminated_length": 1143.6429443359375, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.09961992398479697, "frac_reward_zero_std": 0.0, "grad_norm": 2.4564196381984553, "kl": 0.0016880035400390625, "learning_rate": 9.94e-07, "loss": -0.036, "num_tokens": 21658334.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6954265832901001, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15044035077604337, "rewards/wordcountpos_reward/raw_geo/std": 0.12864148154055016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 985.375, "completions/mean_terminated_length": 985.375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.09981996399279856, "frac_reward_zero_std": 0.0, "grad_norm": 3.2024178687062435, "kl": 0.002910614013671875, "learning_rate": 9.959999999999999e-07, "loss": 0.0009, "num_tokens": 21690084.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0436984300613403, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053173208857816914, "rewards/wordcountpos_reward/raw_geo/std": 0.059335139890900584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1138.0, "completions/max_terminated_length": 1138.0, "completions/mean_length": 860.125, "completions/mean_terminated_length": 860.125, "completions/min_length": 613.0, "completions/min_terminated_length": 613.0, "epoch": 0.10002000400080016, "frac_reward_zero_std": 0.0, "grad_norm": 3.589445308970012, "kl": 0.003017425537109375, "learning_rate": 9.98e-07, "loss": 0.0084, "num_tokens": 21719582.0, "reward": 0.0, "reward_std": 0.6890698671340942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14452684010703898, "rewards/wordcountpos_reward/raw_geo/std": 0.14872745340920018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1170.1875, "completions/mean_terminated_length": 1148.2000732421875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.10022004400880176, "frac_reward_zero_std": 0.0, "grad_norm": 3.3503278285045908, "kl": 0.003173828125, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 21754897.0, "reward": 7.450580596923828e-09, "reward_std": 1.056408166885376, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05163033368753471, "rewards/wordcountpos_reward/raw_geo/std": 0.04741391081471719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1179.0, "completions/max_terminated_length": 1179.0, "completions/mean_length": 967.4375, "completions/mean_terminated_length": 967.4375, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.10042008401680336, "frac_reward_zero_std": 0.0, "grad_norm": 3.3992872991476126, "kl": 0.00319671630859375, "learning_rate": 9.999998902889782e-07, "loss": -0.007, "num_tokens": 21793880.0, "reward": 0.0, "reward_std": 0.5806048512458801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027409740286549706, "rewards/wordcountpos_reward/raw_geo/std": 0.15258789571654874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1107.375, "completions/mean_terminated_length": 1107.375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.10062012402480495, "frac_reward_zero_std": 0.0, "grad_norm": 3.468943403079575, "kl": 0.003437042236328125, "learning_rate": 9.999995611559667e-07, "loss": 0.0029, "num_tokens": 21834766.0, "reward": 0.0, "reward_std": 0.9914134740829468, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1520949058432, "rewards/wordcountpos_reward/raw_geo/std": 0.21153564619625878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1131.1875, "completions/mean_terminated_length": 1131.1875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.10082016403280657, "frac_reward_zero_std": 0.0, "grad_norm": 3.4549201260688407, "kl": 0.00376129150390625, "learning_rate": 9.999990126011257e-07, "loss": 0.0037, "num_tokens": 21879961.0, "reward": 0.0, "reward_std": 0.8015230298042297, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03451011290989368, "rewards/wordcountpos_reward/raw_geo/std": 0.0662209835672636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1166.25, "completions/mean_terminated_length": 1014.5454711914062, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.10102020404080816, "frac_reward_zero_std": 0.0, "grad_norm": 3.0191254994649896, "kl": 0.003025054931640625, "learning_rate": 9.999982446247225e-07, "loss": 0.0079, "num_tokens": 21928437.0, "reward": 0.0, "reward_std": 0.8550890684127808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11181711842126814, "rewards/wordcountpos_reward/raw_geo/std": 0.05753556447331552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 828.875, "completions/mean_terminated_length": 828.875, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.10122024404880976, "frac_reward_zero_std": 0.0, "grad_norm": 3.6007722290881596, "kl": 0.0030851364135742188, "learning_rate": 9.999972572271322e-07, "loss": -0.0071, "num_tokens": 21967739.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0402424335479736, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20852460988398958, "rewards/wordcountpos_reward/raw_geo/std": 0.17636089333781668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994057, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1080.8125, "completions/mean_terminated_length": 1080.8125, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.10142028405681136, "frac_reward_zero_std": 0.0, "grad_norm": 2.58876308164753, "kl": 0.00179290771484375, "learning_rate": 9.999960504088355e-07, "loss": 0.004, "num_tokens": 22016000.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9667521715164185, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10357312940039556, "rewards/wordcountpos_reward/raw_geo/std": 0.14473622461103544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1175.4375, "completions/mean_terminated_length": 1129.071533203125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.10162032406481296, "frac_reward_zero_std": 0.0, "grad_norm": 3.1394805052302046, "kl": 0.0032958984375, "learning_rate": 9.999946241704217e-07, "loss": 0.0209, "num_tokens": 22065887.0, "reward": -9.313225746154785e-09, "reward_std": 1.0673675537109375, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14287042195619049, "rewards/wordcountpos_reward/raw_geo/std": 0.2783586077613992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 923.8125, "completions/mean_terminated_length": 923.8125, "completions/min_length": 475.0, "completions/min_terminated_length": 475.0, "epoch": 0.10182036407281456, "frac_reward_zero_std": 0.0, "grad_norm": 3.6332851176440375, "kl": 0.002704620361328125, "learning_rate": 9.999929785125855e-07, "loss": -0.0539, "num_tokens": 22100532.0, "reward": -7.450580596923828e-09, "reward_std": 1.0480988025665283, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05402005420071991, "rewards/wordcountpos_reward/raw_geo/std": 0.0819726518515701, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.161245154965971, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1111.375, "completions/mean_terminated_length": 1085.4666748046875, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.10202040408081617, "frac_reward_zero_std": 0.0, "grad_norm": 3.6946424669767075, "kl": 0.00368499755859375, "learning_rate": 9.999911134361297e-07, "loss": -0.0215, "num_tokens": 22150458.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9156558513641357, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.28532424094059317, "rewards/wordcountpos_reward/raw_geo/std": 0.3058888392113991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1046.3125, "completions/mean_terminated_length": 1046.3125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.10222044408881777, "frac_reward_zero_std": 0.0, "grad_norm": 3.9778437702769005, "kl": 0.003681182861328125, "learning_rate": 9.999890289419633e-07, "loss": -0.0145, "num_tokens": 22193959.0, "reward": -7.450580596923828e-09, "reward_std": 1.0465683937072754, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09833448892075872, "rewards/wordcountpos_reward/raw_geo/std": 0.06227146483146065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1458055529095489, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1077.1875, "completions/mean_terminated_length": 1077.1875, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.10242048409681936, "frac_reward_zero_std": 0.0, "grad_norm": 3.9496288190588325, "kl": 0.00339508056640625, "learning_rate": 9.999867250311034e-07, "loss": 0.0014, "num_tokens": 22242090.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6210155487060547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.26405872125492136, "rewards/wordcountpos_reward/raw_geo/std": 0.19703978584717102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23597708614436527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1138.875, "completions/mean_terminated_length": 1138.875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.10262052410482096, "frac_reward_zero_std": 0.0, "grad_norm": 2.34170030045483, "kl": 0.001407623291015625, "learning_rate": 9.999842017046729e-07, "loss": -0.0146, "num_tokens": 22289056.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9713587760925293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.061435437395757596, "rewards/wordcountpos_reward/raw_geo/std": 0.09058750077536384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1064.0, "completions/mean_terminated_length": 1001.71435546875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.10282056411282256, "frac_reward_zero_std": 0.0, "grad_norm": 3.5959829750472783, "kl": 0.0032806396484375, "learning_rate": 9.999814589639024e-07, "loss": 0.0096, "num_tokens": 22340968.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0442018508911133, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1458997781674531, "rewards/wordcountpos_reward/raw_geo/std": 0.11164110361722793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.21460558137093164, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1047.875, "completions/mean_terminated_length": 1047.875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.10302060412082416, "frac_reward_zero_std": 0.0, "grad_norm": 3.4190556889124637, "kl": 0.002979278564453125, "learning_rate": 9.99978496810129e-07, "loss": -0.0622, "num_tokens": 22386678.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9543663263320923, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05787473445859307, "rewards/wordcountpos_reward/raw_geo/std": 0.06255903084043969, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1060.0625, "completions/mean_terminated_length": 1060.0625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.10322064412882577, "frac_reward_zero_std": 0.0, "grad_norm": 2.77388045443082, "kl": 0.002544403076171875, "learning_rate": 9.999753152447975e-07, "loss": 0.0194, "num_tokens": 22423103.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8948169946670532, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009287352290140091, "rewards/wordcountpos_reward/raw_geo/std": 0.11516730967135275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1403.3125, "completions/mean_terminated_length": 1279.0, "completions/min_length": 1157.0, "completions/min_terminated_length": 1157.0, "epoch": 0.10342068413682737, "frac_reward_zero_std": 0.0, "grad_norm": 2.5565780589489937, "kl": 0.0024662017822265625, "learning_rate": 9.999719142694587e-07, "loss": 0.0128, "num_tokens": 22472068.0, "reward": 0.0, "reward_std": 0.7774474620819092, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.013580617634790352, "rewards/wordcountpos_reward/raw_geo/std": 0.04766143536972826, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 934.0625, "completions/mean_terminated_length": 676.8181762695312, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.10362072414482897, "frac_reward_zero_std": 0.0, "grad_norm": 3.537932239386057, "kl": 0.002445220947265625, "learning_rate": 9.999682938857714e-07, "loss": -0.2467, "num_tokens": 22506293.0, "reward": 2.9802322387695312e-08, "reward_std": 0.20520628988742828, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04070586292796668, "rewards/wordcountpos_reward/raw_geo/std": 0.044918410903867535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.2467192136222276, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 940.0625, "completions/mean_terminated_length": 940.0625, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.10382076415283056, "frac_reward_zero_std": 0.0, "grad_norm": 3.599234499546969, "kl": 0.0029621124267578125, "learning_rate": 9.999644540955006e-07, "loss": 0.0503, "num_tokens": 22545630.0, "reward": 0.0, "reward_std": 0.6917240619659424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10104033237679752, "rewards/wordcountpos_reward/raw_geo/std": 0.12197133913767746, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.16238956361284543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1009.75, "completions/mean_terminated_length": 1009.75, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.10402080416083216, "frac_reward_zero_std": 0.0, "grad_norm": 3.441418261607082, "kl": 0.00318145751953125, "learning_rate": 9.99960394900519e-07, "loss": 0.0482, "num_tokens": 22585258.0, "reward": 0.0, "reward_std": 1.0492933988571167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0460107454620935, "rewards/wordcountpos_reward/raw_geo/std": 0.08028027634463461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1207.0, "completions/mean_length": 1124.6875, "completions/mean_terminated_length": 1099.666748046875, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.10422084416883377, "frac_reward_zero_std": 0.0, "grad_norm": 3.5554381204838372, "kl": 0.004390716552734375, "learning_rate": 9.999561163028054e-07, "loss": 0.0093, "num_tokens": 22626725.0, "reward": 7.450580596923828e-09, "reward_std": 1.0660068988800049, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.015194260349199755, "rewards/wordcountpos_reward/raw_geo/std": 0.09218172898444489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1224.5625, "completions/mean_terminated_length": 1224.5625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.10442088417683537, "frac_reward_zero_std": 0.0, "grad_norm": 3.3437539621392323, "kl": 0.00363922119140625, "learning_rate": 9.999516183044463e-07, "loss": -0.056, "num_tokens": 22675174.0, "reward": 5.960464477539063e-08, "reward_std": 1.0049105882644653, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0551602976857556, "rewards/wordcountpos_reward/raw_geo/std": 0.07104750172665891, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 999.5625, "completions/mean_terminated_length": 999.5625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.10462092418483697, "frac_reward_zero_std": 0.0, "grad_norm": 3.4160380768407776, "kl": 0.00359344482421875, "learning_rate": 9.99946900907635e-07, "loss": 0.0147, "num_tokens": 22716367.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8914186954498291, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21583604101445772, "rewards/wordcountpos_reward/raw_geo/std": 0.1352627516273477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1132.4375, "completions/mean_terminated_length": 1132.4375, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.10482096419283857, "frac_reward_zero_std": 0.0, "grad_norm": 3.0421652743310243, "kl": 0.00336456298828125, "learning_rate": 9.999419641146717e-07, "loss": -0.0232, "num_tokens": 22763950.0, "reward": 0.0, "reward_std": 0.7214622497558594, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1031496434668357, "rewards/wordcountpos_reward/raw_geo/std": 0.3003652912898537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1139.933349609375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.10502100420084017, "frac_reward_zero_std": 0.0, "grad_norm": 3.2985774537755397, "kl": 0.00292205810546875, "learning_rate": 9.999368079279633e-07, "loss": 0.0276, "num_tokens": 22804541.0, "reward": 0.0, "reward_std": 0.6324073076248169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11208570193050091, "rewards/wordcountpos_reward/raw_geo/std": 0.10217915285146209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 965.0, "completions/mean_terminated_length": 965.0, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.10522104420884176, "frac_reward_zero_std": 0.0, "grad_norm": 2.7334234126256525, "kl": 0.002666473388671875, "learning_rate": 9.999314323500245e-07, "loss": 0.0143, "num_tokens": 22847605.0, "reward": 1.4901161193847656e-08, "reward_std": 1.004686951637268, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15369471949204236, "rewards/wordcountpos_reward/raw_geo/std": 0.22092310022499412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 933.9375, "completions/mean_terminated_length": 896.2000732421875, "completions/min_length": 502.0, "completions/min_terminated_length": 502.0, "epoch": 0.10542108421684337, "frac_reward_zero_std": 0.0, "grad_norm": 3.3299581380307455, "kl": 0.003543853759765625, "learning_rate": 9.99925837383476e-07, "loss": 0.0432, "num_tokens": 22878260.0, "reward": 0.0, "reward_std": 0.1896817535161972, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04412942377587502, "rewards/wordcountpos_reward/raw_geo/std": 0.04875373897272202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1865376538297196, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 896.125, "completions/mean_terminated_length": 896.125, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.10562112422484497, "frac_reward_zero_std": 0.0, "grad_norm": 2.9815122049620904, "kl": 0.002323150634765625, "learning_rate": 9.999200230310464e-07, "loss": -0.0269, "num_tokens": 22905574.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5550601482391357, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.020235067684602828, "rewards/wordcountpos_reward/raw_geo/std": 0.10951413785802963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 1311.25, "completions/mean_terminated_length": 1122.5, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.10582116423284657, "frac_reward_zero_std": 0.0, "grad_norm": 2.8255191501162926, "kl": 0.0035400390625, "learning_rate": 9.999139892955702e-07, "loss": -0.001, "num_tokens": 22961714.0, "reward": 0.0, "reward_std": 0.493524432182312, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.134572415716046, "rewards/wordcountpos_reward/raw_geo/std": 0.11200140348467674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503964, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1340.6875, "completions/mean_terminated_length": 1303.923095703125, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.10602120424084817, "frac_reward_zero_std": 0.0, "grad_norm": 2.6923523057866765, "kl": 0.00286102294921875, "learning_rate": 9.999077361799901e-07, "loss": 0.0378, "num_tokens": 23015805.0, "reward": 0.0, "reward_std": 0.8922038674354553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006554401549943335, "rewards/wordcountpos_reward/raw_geo/std": 0.27449323057238395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1131.25, "completions/mean_terminated_length": 1078.571533203125, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.10622124424884977, "frac_reward_zero_std": 0.0, "grad_norm": 1.9113579049657163, "kl": 0.00177001953125, "learning_rate": 9.999012636873545e-07, "loss": -0.018, "num_tokens": 23054177.0, "reward": 2.9802322387695312e-08, "reward_std": 0.716476559638977, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08867874949732134, "rewards/wordcountpos_reward/raw_geo/std": 0.07211955195163484, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1230.4375, "completions/mean_terminated_length": 1212.4666748046875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.10642128425685136, "frac_reward_zero_std": 0.0, "grad_norm": 2.3710401626716022, "kl": 0.00157928466796875, "learning_rate": 9.9989457182082e-07, "loss": -0.0389, "num_tokens": 23096224.0, "reward": 0.0, "reward_std": 0.4688728451728821, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010246156670225156, "rewards/wordcountpos_reward/raw_geo/std": 0.12366147439349398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1369.5625, "completions/mean_terminated_length": 1310.2728271484375, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.10662132426485298, "frac_reward_zero_std": 0.0, "grad_norm": 2.1150195028880274, "kl": 0.00162506103515625, "learning_rate": 9.998876605836494e-07, "loss": 0.0159, "num_tokens": 23143209.0, "reward": -2.9802322387695312e-08, "reward_std": 0.637698769569397, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03878263204132484, "rewards/wordcountpos_reward/raw_geo/std": 0.06292999265928811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1029.0, "completions/mean_terminated_length": 1029.0, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.10682136427285457, "frac_reward_zero_std": 0.0, "grad_norm": 3.456448530381217, "kl": 0.003177642822265625, "learning_rate": 9.998805299792124e-07, "loss": -0.0137, "num_tokens": 23179833.0, "reward": 0.0, "reward_std": 0.8021977543830872, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22649387280945132, "rewards/wordcountpos_reward/raw_geo/std": 0.26097069279213403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1067.5, "completions/mean_terminated_length": 1067.5, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.10702140428085617, "frac_reward_zero_std": 0.0, "grad_norm": 3.2135730709328763, "kl": 0.0035858154296875, "learning_rate": 9.998731800109863e-07, "loss": -0.0313, "num_tokens": 23229617.0, "reward": 0.0, "reward_std": 0.8154876232147217, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07405866024457258, "rewards/wordcountpos_reward/raw_geo/std": 0.1353684457165506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 1030.0, "completions/mean_terminated_length": 1030.0, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.10722144428885777, "frac_reward_zero_std": 0.0, "grad_norm": 3.617657023267098, "kl": 0.0034942626953125, "learning_rate": 9.998656106825547e-07, "loss": -0.0524, "num_tokens": 23272657.0, "reward": 7.450580596923828e-09, "reward_std": 1.04160475730896, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03418250336862778, "rewards/wordcountpos_reward/raw_geo/std": 0.0721857084891372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1266.6875, "completions/mean_terminated_length": 1033.375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.10742148429685937, "frac_reward_zero_std": 0.0, "grad_norm": 3.3265678703648494, "kl": 0.003875732421875, "learning_rate": 9.998578219976087e-07, "loss": -0.0381, "num_tokens": 23318188.0, "reward": 7.450580596923828e-09, "reward_std": 0.9139026999473572, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07845589613207468, "rewards/wordcountpos_reward/raw_geo/std": 0.2843549319551082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1095.1875, "completions/mean_terminated_length": 1095.1875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.10762152430486097, "frac_reward_zero_std": 0.0, "grad_norm": 4.021521752726582, "kl": 0.004184722900390625, "learning_rate": 9.998498139599457e-07, "loss": 0.0017, "num_tokens": 23368439.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4519929885864258, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.018939090902597606, "rewards/wordcountpos_reward/raw_geo/std": 0.13436037245769455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.141878925953186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1228.9375, "completions/mean_terminated_length": 1210.86669921875, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.10782156431286258, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098989124376464, "kl": 0.002986907958984375, "learning_rate": 9.99841586573471e-07, "loss": -0.0232, "num_tokens": 23418878.0, "reward": 0.0, "reward_std": 0.836676836013794, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1268032575343032, "rewards/wordcountpos_reward/raw_geo/std": 0.13357303577055174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16727666149669979, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1138.0625, "completions/mean_terminated_length": 1138.0625, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.10802160432086418, "frac_reward_zero_std": 0.0, "grad_norm": 3.6126260162838095, "kl": 0.0038909912109375, "learning_rate": 9.998331398421957e-07, "loss": 0.0191, "num_tokens": 23452279.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9278035759925842, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0030557194900407494, "rewards/wordcountpos_reward/raw_geo/std": 0.2449606092396285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1023.625, "completions/mean_terminated_length": 1023.625, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.10822164432886577, "frac_reward_zero_std": 0.0, "grad_norm": 3.672921574444718, "kl": 0.00301361083984375, "learning_rate": 9.99824473770239e-07, "loss": -0.0348, "num_tokens": 23483737.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8283048868179321, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.133992028642348, "rewards/wordcountpos_reward/raw_geo/std": 0.07518646880181934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823633, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1137.3125, "completions/mean_terminated_length": 972.45458984375, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.10842168433686737, "frac_reward_zero_std": 0.0, "grad_norm": 3.4632358619334407, "kl": 0.004032135009765625, "learning_rate": 9.99815588361826e-07, "loss": -0.0083, "num_tokens": 23533718.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8786888718605042, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07120426404965362, "rewards/wordcountpos_reward/raw_geo/std": 0.09404858115499712, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 978.375, "completions/mean_terminated_length": 978.375, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.10862172434486897, "frac_reward_zero_std": 0.0, "grad_norm": 4.101729557257998, "kl": 0.003948211669921875, "learning_rate": 9.998064836212896e-07, "loss": -0.0058, "num_tokens": 23567228.0, "reward": 0.0, "reward_std": 0.9299734830856323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029188210369984507, "rewards/wordcountpos_reward/raw_geo/std": 0.06431088732227853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1160.9375, "completions/mean_terminated_length": 1112.5, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.10882176435287057, "frac_reward_zero_std": 0.0, "grad_norm": 3.2615494952976345, "kl": 0.003200531005859375, "learning_rate": 9.997971595530694e-07, "loss": -0.0203, "num_tokens": 23607427.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8771929740905762, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10163403460258819, "rewards/wordcountpos_reward/raw_geo/std": 0.04956581953051861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1119.0, "completions/max_terminated_length": 1119.0, "completions/mean_length": 876.0625, "completions/mean_terminated_length": 876.0625, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.10902180436087218, "frac_reward_zero_std": 0.0, "grad_norm": 3.256012659081334, "kl": 0.00231170654296875, "learning_rate": 9.997876161617116e-07, "loss": -0.0396, "num_tokens": 23656380.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9862924814224243, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07055651000374884, "rewards/wordcountpos_reward/raw_geo/std": 0.07463422099545439, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1052.0, "completions/mean_terminated_length": 1052.0, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.10922184436887378, "frac_reward_zero_std": 0.0, "grad_norm": 3.727847257725504, "kl": 0.00359344482421875, "learning_rate": 9.997778534518698e-07, "loss": 0.0104, "num_tokens": 23689996.0, "reward": 0.0, "reward_std": 0.9218430519104004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10176819934330851, "rewards/wordcountpos_reward/raw_geo/std": 0.12292240999581669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1288.4375, "completions/mean_terminated_length": 1217.916748046875, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.10942188437687538, "frac_reward_zero_std": 0.0, "grad_norm": 2.963442401735094, "kl": 0.003566741943359375, "learning_rate": 9.99767871428304e-07, "loss": -0.0221, "num_tokens": 23742539.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0415704250335693, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026725128439603184, "rewards/wordcountpos_reward/raw_geo/std": 0.14487264209992345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 1064.375, "completions/mean_terminated_length": 1064.375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.10962192438487697, "frac_reward_zero_std": 0.0, "grad_norm": 2.339155250657867, "kl": 0.0017642974853515625, "learning_rate": 9.997576700958821e-07, "loss": -0.0058, "num_tokens": 23781257.0, "reward": -7.450580596923828e-09, "reward_std": 1.027381420135498, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.032476049463412024, "rewards/wordcountpos_reward/raw_geo/std": 0.08151936258716173, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 994.5625, "completions/mean_terminated_length": 994.5625, "completions/min_length": 505.0, "completions/min_terminated_length": 505.0, "epoch": 0.10982196439287857, "frac_reward_zero_std": 0.0, "grad_norm": 3.7747131511116776, "kl": 0.004329681396484375, "learning_rate": 9.99747249459578e-07, "loss": -0.0398, "num_tokens": 23815570.0, "reward": 0.0, "reward_std": 0.698041558265686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08488631805000081, "rewards/wordcountpos_reward/raw_geo/std": 0.11848568211004275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1128.1875, "completions/mean_terminated_length": 1128.1875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.11002200440088018, "frac_reward_zero_std": 0.0, "grad_norm": 3.4943022272665933, "kl": 0.003734588623046875, "learning_rate": 9.997366095244725e-07, "loss": 0.001, "num_tokens": 23859165.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0013880729675293, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0929478241120112, "rewards/wordcountpos_reward/raw_geo/std": 0.1799033920581418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1095.0, "completions/mean_terminated_length": 1095.0, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.11022204440888178, "frac_reward_zero_std": 0.0, "grad_norm": 3.8221874833604272, "kl": 0.00399017333984375, "learning_rate": 9.997257502957542e-07, "loss": -0.0108, "num_tokens": 23896117.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0622494220733643, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011306161111533933, "rewards/wordcountpos_reward/raw_geo/std": 0.03490152234471482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1190.3125, "completions/mean_terminated_length": 1146.071533203125, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.11042208441688338, "frac_reward_zero_std": 0.0, "grad_norm": 2.9491541845630147, "kl": 0.0034637451171875, "learning_rate": 9.997146717787177e-07, "loss": -0.0425, "num_tokens": 23938634.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9573972225189209, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0449865480633076, "rewards/wordcountpos_reward/raw_geo/std": 0.041069503586211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1262.125, "completions/mean_terminated_length": 1182.8333740234375, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.11062212442488498, "frac_reward_zero_std": 0.0, "grad_norm": 3.2322382550741486, "kl": 0.003429412841796875, "learning_rate": 9.997033739787652e-07, "loss": -0.0188, "num_tokens": 23981732.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6290683746337891, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11262019759432954, "rewards/wordcountpos_reward/raw_geo/std": 0.04860043192875778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14801151106386087, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1157.5625, "completions/mean_terminated_length": 1157.5625, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.11082216443288657, "frac_reward_zero_std": 0.0, "grad_norm": 2.8116699739298796, "kl": 0.00273895263671875, "learning_rate": 9.996918569014055e-07, "loss": -0.0332, "num_tokens": 24015741.0, "reward": 0.0, "reward_std": 0.7823160886764526, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19674041412945933, "rewards/wordcountpos_reward/raw_geo/std": 0.1502511192862497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/max_terminated_length": 917.0, "completions/mean_length": 788.5, "completions/mean_terminated_length": 788.5, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.11102220444088817, "frac_reward_zero_std": 0.0, "grad_norm": 3.285146713949514, "kl": 0.002384185791015625, "learning_rate": 9.996801205522545e-07, "loss": -0.024, "num_tokens": 24044933.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6573373079299927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03580027288163957, "rewards/wordcountpos_reward/raw_geo/std": 0.10357984831541751, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1051.4375, "completions/mean_terminated_length": 1051.4375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.11122224444888978, "frac_reward_zero_std": 0.0, "grad_norm": 2.943957123082875, "kl": 0.003726959228515625, "learning_rate": 9.996681649370347e-07, "loss": 0.0239, "num_tokens": 24085340.0, "reward": -5.960464477539063e-08, "reward_std": 0.7492737770080566, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13077286039206784, "rewards/wordcountpos_reward/raw_geo/std": 0.09374694514278689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1180.1875, "completions/mean_terminated_length": 1158.86669921875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.11142228445689138, "frac_reward_zero_std": 0.0, "grad_norm": 2.683342362523262, "kl": 0.002689361572265625, "learning_rate": 9.996559900615756e-07, "loss": -0.0095, "num_tokens": 24134055.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8977107405662537, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08564673215540983, "rewards/wordcountpos_reward/raw_geo/std": 0.07659207324220883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 943.8125, "completions/mean_terminated_length": 943.8125, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.11162232446489298, "frac_reward_zero_std": 0.0, "grad_norm": 4.189765804538905, "kl": 0.0048675537109375, "learning_rate": 9.996435959318142e-07, "loss": -0.0052, "num_tokens": 24185236.0, "reward": 0.0, "reward_std": 0.9948856830596924, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06460028780854046, "rewards/wordcountpos_reward/raw_geo/std": 0.09920312753771221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.11182236447289458, "frac_reward_zero_std": 0.0, "grad_norm": 3.198950422525801, "kl": 0.003292083740234375, "learning_rate": 9.996309825537934e-07, "loss": 0.002, "num_tokens": 24226614.0, "reward": 0.0, "reward_std": 0.5553363561630249, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022890796905422128, "rewards/wordcountpos_reward/raw_geo/std": 0.2262109657007015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 999.3125, "completions/mean_terminated_length": 999.3125, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.11202240448089618, "frac_reward_zero_std": 0.0, "grad_norm": 3.258890908105495, "kl": 0.003223419189453125, "learning_rate": 9.996181499336637e-07, "loss": -0.0497, "num_tokens": 24264323.0, "reward": -2.60770320892334e-08, "reward_std": 1.063173532485962, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11065609031325008, "rewards/wordcountpos_reward/raw_geo/std": 0.053565496049354094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1237.875, "completions/mean_terminated_length": 1220.4000244140625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.11222244448889777, "frac_reward_zero_std": 0.0, "grad_norm": 3.061252396367618, "kl": 0.0030460357666015625, "learning_rate": 9.996050980776829e-07, "loss": -0.0191, "num_tokens": 24305537.0, "reward": 0.0, "reward_std": 0.8597975969314575, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09832104803295717, "rewards/wordcountpos_reward/raw_geo/std": 0.08394843955418584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 904.0625, "completions/mean_terminated_length": 904.0625, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.11242248449689939, "frac_reward_zero_std": 0.0, "grad_norm": 3.376945163764293, "kl": 0.00327301025390625, "learning_rate": 9.995918269922143e-07, "loss": 0.0285, "num_tokens": 24342498.0, "reward": 0.0, "reward_std": 0.9232865571975708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028784287020987686, "rewards/wordcountpos_reward/raw_geo/std": 0.059373433572404787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1143.3125, "completions/mean_terminated_length": 1143.3125, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.11262252450490098, "frac_reward_zero_std": 0.0, "grad_norm": 1.8176050600575773, "kl": 0.0019559860229492188, "learning_rate": 9.995783366837291e-07, "loss": -0.0135, "num_tokens": 24376855.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9699097871780396, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07519102397061327, "rewards/wordcountpos_reward/raw_geo/std": 0.11153401002158146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1025.5, "completions/mean_terminated_length": 1025.5, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.11282256451290258, "frac_reward_zero_std": 0.0, "grad_norm": 3.8579631240466967, "kl": 0.00431060791015625, "learning_rate": 9.995646271588058e-07, "loss": -0.0266, "num_tokens": 24423711.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0112859010696411, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0684722708153597, "rewards/wordcountpos_reward/raw_geo/std": 0.07208256233553126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684815, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1121.0625, "completions/mean_terminated_length": 1095.800048828125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.11302260452090418, "frac_reward_zero_std": 0.0, "grad_norm": 3.2000097141700103, "kl": 0.00372314453125, "learning_rate": 9.995506984241287e-07, "loss": -0.0458, "num_tokens": 24464112.0, "reward": 0.0, "reward_std": 0.9509493708610535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.023468870239363615, "rewards/wordcountpos_reward/raw_geo/std": 0.07889781303655741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1210.75, "completions/mean_terminated_length": 1210.75, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.11322264452890578, "frac_reward_zero_std": 0.0, "grad_norm": 3.530942414378748, "kl": 0.004276275634765625, "learning_rate": 9.995365504864897e-07, "loss": 0.0087, "num_tokens": 24513772.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0324478149414062, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09899354500667781, "rewards/wordcountpos_reward/raw_geo/std": 0.0843227690429665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1286.5625, "completions/mean_terminated_length": 1256.071533203125, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.11342268453690738, "frac_reward_zero_std": 0.0, "grad_norm": 2.989295942963754, "kl": 0.00418853759765625, "learning_rate": 9.995221833527873e-07, "loss": -0.0365, "num_tokens": 24563717.0, "reward": 0.0, "reward_std": 0.6273993849754333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02518059984630949, "rewards/wordcountpos_reward/raw_geo/std": 0.06487796151302058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1210.5, "completions/mean_terminated_length": 1191.2000732421875, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.11362272454490899, "frac_reward_zero_std": 0.0, "grad_norm": 2.833800929068872, "kl": 0.0024013519287109375, "learning_rate": 9.995075970300272e-07, "loss": 0.0354, "num_tokens": 24605309.0, "reward": 0.0, "reward_std": 0.8578473329544067, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04966074312154036, "rewards/wordcountpos_reward/raw_geo/std": 0.06503495016918011, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1147.9375, "completions/mean_terminated_length": 1124.4666748046875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.11382276455291059, "frac_reward_zero_std": 0.0, "grad_norm": 2.7935809710912016, "kl": 0.00290679931640625, "learning_rate": 9.994927915253217e-07, "loss": 0.0203, "num_tokens": 24635852.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9936357736587524, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00745102399672297, "rewards/wordcountpos_reward/raw_geo/std": 0.08224213880335354, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1327.125, "completions/mean_terminated_length": 1223.4000244140625, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.11402280456091218, "frac_reward_zero_std": 0.0, "grad_norm": 2.654950700401816, "kl": 0.003543853759765625, "learning_rate": 9.994777668458896e-07, "loss": -0.0756, "num_tokens": 24692286.0, "reward": 0.0, "reward_std": 0.8973362445831299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11054539254611581, "rewards/wordcountpos_reward/raw_geo/std": 0.12082637303706929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1115.5, "completions/mean_terminated_length": 1115.5, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.11422284456891378, "frac_reward_zero_std": 0.0, "grad_norm": 2.9263528466020903, "kl": 0.003662109375, "learning_rate": 9.994625229990577e-07, "loss": -0.0472, "num_tokens": 24732222.0, "reward": 0.0, "reward_std": 0.8776353597640991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05033152705920437, "rewards/wordcountpos_reward/raw_geo/std": 0.07089268646780239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 993.4375, "completions/mean_terminated_length": 993.4375, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.11442288457691538, "frac_reward_zero_std": 0.0, "grad_norm": 3.159205315874274, "kl": 0.003131866455078125, "learning_rate": 9.994470599922585e-07, "loss": -0.023, "num_tokens": 24780205.0, "reward": 0.0, "reward_std": 0.9990221261978149, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04287313775383517, "rewards/wordcountpos_reward/raw_geo/std": 0.11187219434223016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1160.5625, "completions/mean_terminated_length": 1160.5625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.11462292458491698, "frac_reward_zero_std": 0.0, "grad_norm": 2.8619545402282514, "kl": 0.0032501220703125, "learning_rate": 9.99431377833032e-07, "loss": 0.0108, "num_tokens": 24828518.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8064454793930054, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04616483808873933, "rewards/wordcountpos_reward/raw_geo/std": 0.0556774454688619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1161.375, "completions/mean_terminated_length": 1161.375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.11482296459291859, "frac_reward_zero_std": 0.0, "grad_norm": 3.3056665248023487, "kl": 0.00386810302734375, "learning_rate": 9.994154765290247e-07, "loss": -0.0192, "num_tokens": 24869836.0, "reward": 0.0, "reward_std": 0.8694266080856323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07475566030167846, "rewards/wordcountpos_reward/raw_geo/std": 0.08803931553654668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1188.9375, "completions/mean_terminated_length": 1188.9375, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.11502300460092019, "frac_reward_zero_std": 0.0, "grad_norm": 2.2711858783873935, "kl": 0.0024394989013671875, "learning_rate": 9.993993560879905e-07, "loss": -0.032, "num_tokens": 24908227.0, "reward": 0.0, "reward_std": 0.748092770576477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024501221022122377, "rewards/wordcountpos_reward/raw_geo/std": 0.09323710150970747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 938.0625, "completions/mean_terminated_length": 938.0625, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.11522304460892178, "frac_reward_zero_std": 0.0, "grad_norm": 3.2028378879940096, "kl": 0.002849578857421875, "learning_rate": 9.993830165177895e-07, "loss": -0.0096, "num_tokens": 24947052.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9939234256744385, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05664545438746776, "rewards/wordcountpos_reward/raw_geo/std": 0.07679814285615115, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1175.25, "completions/mean_terminated_length": 1175.25, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.11542308461692338, "frac_reward_zero_std": 0.0, "grad_norm": 3.3913256548708586, "kl": 0.004215240478515625, "learning_rate": 9.99366457826389e-07, "loss": 0.0005, "num_tokens": 24990904.0, "reward": 0.0, "reward_std": 1.0443048477172852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0884725822662143, "rewards/wordcountpos_reward/raw_geo/std": 0.10828950413444931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 991.875, "completions/mean_terminated_length": 991.875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.11562312462492498, "frac_reward_zero_std": 0.0, "grad_norm": 3.910274966700335, "kl": 0.00441741943359375, "learning_rate": 9.99349680021863e-07, "loss": 0.0065, "num_tokens": 25027550.0, "reward": 0.0, "reward_std": 0.837898313999176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.017382526699437066, "rewards/wordcountpos_reward/raw_geo/std": 0.1234621302531474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12995725793078622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1044.6875, "completions/mean_terminated_length": 979.6428833007812, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.11582316463292658, "frac_reward_zero_std": 0.0, "grad_norm": 3.353134539448895, "kl": 0.0036258697509765625, "learning_rate": 9.993326831123928e-07, "loss": -0.0683, "num_tokens": 25071753.0, "reward": 0.0, "reward_std": 0.44138485193252563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03922920221087807, "rewards/wordcountpos_reward/raw_geo/std": 0.2234370592356771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1052.5, "completions/mean_terminated_length": 1052.5, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.11602320464092819, "frac_reward_zero_std": 0.0, "grad_norm": 3.600139110354816, "kl": 0.004913330078125, "learning_rate": 9.993154671062658e-07, "loss": -0.0378, "num_tokens": 25108041.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0393924713134766, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1792920419318415, "rewards/wordcountpos_reward/raw_geo/std": 0.1689911534755474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1076.9375, "completions/mean_terminated_length": 1076.9375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.11622324464892979, "frac_reward_zero_std": 0.0, "grad_norm": 3.8589304905133766, "kl": 0.0048980712890625, "learning_rate": 9.992980320118768e-07, "loss": -0.0393, "num_tokens": 25159096.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8237398266792297, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07778217797664813, "rewards/wordcountpos_reward/raw_geo/std": 0.18268940884829016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1030.875, "completions/mean_terminated_length": 1030.875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.11642328465693139, "frac_reward_zero_std": 0.0, "grad_norm": 2.8635549911104263, "kl": 0.0033092498779296875, "learning_rate": 9.992803778377272e-07, "loss": -0.0391, "num_tokens": 25194710.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9984513521194458, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07497094598922657, "rewards/wordcountpos_reward/raw_geo/std": 0.13422377795413132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 1041.375, "completions/mean_terminated_length": 1010.800048828125, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.11662332466493298, "frac_reward_zero_std": 0.0, "grad_norm": 2.775301710796813, "kl": 0.0035247802734375, "learning_rate": 9.99262504592425e-07, "loss": 0.0319, "num_tokens": 25227452.0, "reward": 2.9802322387695312e-08, "reward_std": 0.68071448802948, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06916690320467472, "rewards/wordcountpos_reward/raw_geo/std": 0.1174951869531071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 990.5625, "completions/mean_terminated_length": 990.5625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.11682336467293458, "frac_reward_zero_std": 0.0, "grad_norm": 2.995364025673235, "kl": 0.00260162353515625, "learning_rate": 9.99244412284686e-07, "loss": -0.0264, "num_tokens": 25269061.0, "reward": 0.0, "reward_std": 1.0686452388763428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061970819933237935, "rewards/wordcountpos_reward/raw_geo/std": 0.06606573332844305, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1233.0625, "completions/mean_terminated_length": 1215.2667236328125, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.1170234046809362, "frac_reward_zero_std": 0.0, "grad_norm": 3.3124435602370617, "kl": 0.004154205322265625, "learning_rate": 9.99226100923331e-07, "loss": -0.0023, "num_tokens": 25317702.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0391209125518799, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13717781753963867, "rewards/wordcountpos_reward/raw_geo/std": 0.06388630839861507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1066.625, "completions/mean_terminated_length": 1066.625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.11722344468893779, "frac_reward_zero_std": 0.0, "grad_norm": 3.5779085405472486, "kl": 0.00460052490234375, "learning_rate": 9.992075705172898e-07, "loss": 0.0003, "num_tokens": 25361648.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0536937713623047, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04481952401741872, "rewards/wordcountpos_reward/raw_geo/std": 0.09611909117815925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1135.125, "completions/mean_terminated_length": 1135.125, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.11742348469693939, "frac_reward_zero_std": 0.0, "grad_norm": 2.513543984785798, "kl": 0.00205230712890625, "learning_rate": 9.991888210755972e-07, "loss": 0.0057, "num_tokens": 25409746.0, "reward": 7.450580596923828e-09, "reward_std": 1.0209201574325562, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10751887698938156, "rewards/wordcountpos_reward/raw_geo/std": 0.08335179521983012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1030.625, "completions/mean_terminated_length": 1030.625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.11762352470494099, "frac_reward_zero_std": 0.0, "grad_norm": 2.6230283885135584, "kl": 0.00194549560546875, "learning_rate": 9.991698526073957e-07, "loss": 0.0075, "num_tokens": 25441860.0, "reward": 1.1175870895385742e-08, "reward_std": 0.9877455234527588, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11025377591357817, "rewards/wordcountpos_reward/raw_geo/std": 0.15686384576746407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1208.5625, "completions/mean_terminated_length": 1208.5625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.11782356471294259, "frac_reward_zero_std": 0.0, "grad_norm": 2.848728275486942, "kl": 0.0029144287109375, "learning_rate": 9.991506651219344e-07, "loss": 0.0081, "num_tokens": 25491981.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7379999160766602, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11822403131869448, "rewards/wordcountpos_reward/raw_geo/std": 0.2325377137103034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 986.625, "completions/mean_terminated_length": 868.1538696289062, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.11802360472094418, "frac_reward_zero_std": 0.0, "grad_norm": 2.8757895426961873, "kl": 0.003429412841796875, "learning_rate": 9.991312586285694e-07, "loss": -0.1393, "num_tokens": 25532535.0, "reward": 0.0, "reward_std": 0.796451985836029, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06713262369187364, "rewards/wordcountpos_reward/raw_geo/std": 0.05512106823014079, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17384539747207065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 951.8125, "completions/mean_terminated_length": 951.8125, "completions/min_length": 578.0, "completions/min_terminated_length": 578.0, "epoch": 0.1182236447289458, "frac_reward_zero_std": 0.0, "grad_norm": 2.554507410650862, "kl": 0.002689361572265625, "learning_rate": 9.99111633136763e-07, "loss": -0.0778, "num_tokens": 25563276.0, "reward": 0.0, "reward_std": 0.8934731483459473, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10302287957460166, "rewards/wordcountpos_reward/raw_geo/std": 0.13524604673773902, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1160.8125, "completions/mean_terminated_length": 1160.8125, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.1184236847369474, "frac_reward_zero_std": 0.0, "grad_norm": 3.4125326419007647, "kl": 0.004535675048828125, "learning_rate": 9.99091788656085e-07, "loss": 0.0118, "num_tokens": 25613153.0, "reward": 0.0, "reward_std": 0.896718442440033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13907460245307837, "rewards/wordcountpos_reward/raw_geo/std": 0.06629415874490989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1065.6875, "completions/mean_terminated_length": 1065.6875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.11862372474494899, "frac_reward_zero_std": 0.0, "grad_norm": 3.77619782251697, "kl": 0.0047760009765625, "learning_rate": 9.990717251962117e-07, "loss": 0.0018, "num_tokens": 25660716.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9303244352340698, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2007007850778933, "rewards/wordcountpos_reward/raw_geo/std": 0.12608709940807936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.11882376475295059, "frac_reward_zero_std": 0.0, "grad_norm": 3.2705632132229274, "kl": 0.003604888916015625, "learning_rate": 9.990514427669258e-07, "loss": 0.0016, "num_tokens": 25699530.0, "reward": 0.0, "reward_std": 0.8749842643737793, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014573999936885611, "rewards/wordcountpos_reward/raw_geo/std": 0.12330494898387871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1408308678285174, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1070.8125, "completions/mean_terminated_length": 1070.8125, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.11902380476095219, "frac_reward_zero_std": 0.0, "grad_norm": 3.613535943423098, "kl": 0.00507354736328125, "learning_rate": 9.990309413781174e-07, "loss": 0.0076, "num_tokens": 25740295.0, "reward": 5.960464477539063e-08, "reward_std": 0.8166874647140503, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07473252236098153, "rewards/wordcountpos_reward/raw_geo/std": 0.09498954456399569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 1032.375, "completions/mean_terminated_length": 1032.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.11922384476895379, "frac_reward_zero_std": 0.0, "grad_norm": 3.8786717402680106, "kl": 0.0048828125, "learning_rate": 9.99010221039783e-07, "loss": -0.0254, "num_tokens": 25786157.0, "reward": 0.0, "reward_std": 0.721498966217041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10591279427986157, "rewards/wordcountpos_reward/raw_geo/std": 0.27952419364077286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1087.375, "completions/mean_terminated_length": 1087.375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.1194238847769554, "frac_reward_zero_std": 0.0, "grad_norm": 3.649881414576622, "kl": 0.00440216064453125, "learning_rate": 9.989892817620258e-07, "loss": 0.0169, "num_tokens": 25828275.0, "reward": 0.0, "reward_std": 1.045090675354004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1728914810093765, "rewards/wordcountpos_reward/raw_geo/std": 0.10331252127508549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982529, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1155.0625, "completions/mean_terminated_length": 1155.0625, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.119623924784957, "frac_reward_zero_std": 0.0, "grad_norm": 3.1587583548670577, "kl": 0.003627777099609375, "learning_rate": 9.989681235550562e-07, "loss": -0.0077, "num_tokens": 25870700.0, "reward": 0.0, "reward_std": 0.5376613140106201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10627275215566025, "rewards/wordcountpos_reward/raw_geo/std": 0.25543361580053725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 940.25, "completions/mean_terminated_length": 940.25, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.11982396479295859, "frac_reward_zero_std": 0.0, "grad_norm": 1.6594137725287148, "kl": 0.0004929304122924805, "learning_rate": 9.989467464291908e-07, "loss": 0.0172, "num_tokens": 25902776.0, "reward": 0.0, "reward_std": 0.9253208637237549, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10106021719825463, "rewards/wordcountpos_reward/raw_geo/std": 0.12120340532914584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.12002400480096019, "frac_reward_zero_std": 0.0, "grad_norm": 3.3025065823842503, "kl": 0.00357818603515625, "learning_rate": 9.989251503948531e-07, "loss": -0.0314, "num_tokens": 25941464.0, "reward": -4.470348358154297e-08, "reward_std": 1.0555447340011597, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0028509327567583483, "rewards/wordcountpos_reward/raw_geo/std": 0.11621934984659056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 859.3125, "completions/mean_terminated_length": 859.3125, "completions/min_length": 619.0, "completions/min_terminated_length": 619.0, "epoch": 0.12022404480896179, "frac_reward_zero_std": 0.0, "grad_norm": 1.8393055690737452, "kl": 0.0011968612670898438, "learning_rate": 9.989033354625734e-07, "loss": -0.0094, "num_tokens": 25973277.0, "reward": 5.960464477539063e-08, "reward_std": 0.4579020142555237, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08404742083151685, "rewards/wordcountpos_reward/raw_geo/std": 0.24157590795669093, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1163.272705078125, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.12042408481696339, "frac_reward_zero_std": 0.0, "grad_norm": 3.336937062895583, "kl": 0.004791259765625, "learning_rate": 9.988813016429892e-07, "loss": 0.0778, "num_tokens": 26018461.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7661669254302979, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09766727093301909, "rewards/wordcountpos_reward/raw_geo/std": 0.1446151190928726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792515, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 885.3125, "completions/mean_terminated_length": 885.3125, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 4.169743733588036, "kl": 0.00424957275390625, "learning_rate": 9.98859048946844e-07, "loss": 0.0144, "num_tokens": 26068330.0, "reward": -7.450580596923828e-09, "reward_std": 1.0524903535842896, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.061496268225116744, "rewards/wordcountpos_reward/raw_geo/std": 0.24759325476198477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 1240.0625, "completions/mean_terminated_length": 980.125, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.1208241648329666, "frac_reward_zero_std": 0.0, "grad_norm": 2.657923630667353, "kl": 0.0023164749145507812, "learning_rate": 9.98836577384988e-07, "loss": -0.0388, "num_tokens": 26114571.0, "reward": 0.0, "reward_std": 0.5082321166992188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10912996498151603, "rewards/wordcountpos_reward/raw_geo/std": 0.10358461671892392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1449776483411099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1093.5625, "completions/mean_terminated_length": 1066.4666748046875, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.1210242048409682, "frac_reward_zero_std": 0.0, "grad_norm": 3.063282695520789, "kl": 0.003543853759765625, "learning_rate": 9.98813886968379e-07, "loss": 0.0004, "num_tokens": 26165916.0, "reward": 0.0, "reward_std": 0.888217568397522, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06128346360775267, "rewards/wordcountpos_reward/raw_geo/std": 0.10585517944269796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1161.0625, "completions/mean_terminated_length": 1138.4666748046875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.12122424484896979, "frac_reward_zero_std": 0.0, "grad_norm": 2.4556025997089503, "kl": 0.00281524658203125, "learning_rate": 9.987909777080804e-07, "loss": -0.0251, "num_tokens": 26200245.0, "reward": -7.450580596923828e-09, "reward_std": 1.045180320739746, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05133146450931896, "rewards/wordcountpos_reward/raw_geo/std": 0.0773202615145305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1066.875, "completions/mean_terminated_length": 1038.0, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.12142428485697139, "frac_reward_zero_std": 0.0, "grad_norm": 3.5582325376975255, "kl": 0.004047393798828125, "learning_rate": 9.987678496152636e-07, "loss": -0.0373, "num_tokens": 26252051.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4290241599082947, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028119446965989305, "rewards/wordcountpos_reward/raw_geo/std": 0.07862825389079174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.15389991938004774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1070.5625, "completions/mean_terminated_length": 1070.5625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.12162432486497299, "frac_reward_zero_std": 0.0, "grad_norm": 3.5837425986678246, "kl": 0.00506591796875, "learning_rate": 9.987445027012051e-07, "loss": -0.0138, "num_tokens": 26291276.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0388023853302002, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0330544137889492, "rewards/wordcountpos_reward/raw_geo/std": 0.054684386172413016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1254.0, "completions/mean_terminated_length": 1254.0, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.1218243648729746, "frac_reward_zero_std": 0.0, "grad_norm": 2.8333074798072353, "kl": 0.003570556640625, "learning_rate": 9.987209369772897e-07, "loss": 0.0301, "num_tokens": 26335036.0, "reward": 0.0, "reward_std": 0.7927502989768982, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.28678040310294495, "rewards/wordcountpos_reward/raw_geo/std": 0.1652713501896006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 967.375, "completions/mean_terminated_length": 967.375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.1220244048809762, "frac_reward_zero_std": 0.0, "grad_norm": 1.6270450196842778, "kl": 0.001155853271484375, "learning_rate": 9.986971524550076e-07, "loss": -0.1561, "num_tokens": 26375154.0, "reward": 2.9802322387695312e-08, "reward_std": 1.042828917503357, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10004409930495685, "rewards/wordcountpos_reward/raw_geo/std": 0.03765844204343756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.20294133434366, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 946.1875, "completions/mean_terminated_length": 946.1875, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.1222244448889778, "frac_reward_zero_std": 0.0, "grad_norm": 3.3084233070482325, "kl": 0.004467010498046875, "learning_rate": 9.986731491459567e-07, "loss": -0.064, "num_tokens": 26418821.0, "reward": -5.960464477539063e-08, "reward_std": 0.6977008581161499, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12472395408433459, "rewards/wordcountpos_reward/raw_geo/std": 0.12125465444713009, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 946.625, "completions/mean_terminated_length": 946.625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.1224244848969794, "frac_reward_zero_std": 0.0, "grad_norm": 3.9088394766494114, "kl": 0.004638671875, "learning_rate": 9.986489270618406e-07, "loss": -0.0041, "num_tokens": 26460183.0, "reward": 0.0, "reward_std": 0.8044872879981995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004338921517332701, "rewards/wordcountpos_reward/raw_geo/std": 0.22816820582093225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1213.375, "completions/mean_terminated_length": 1041.4000244140625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.12262452490498099, "frac_reward_zero_std": 0.0, "grad_norm": 2.276849364455299, "kl": 0.003658294677734375, "learning_rate": 9.986244862144706e-07, "loss": -0.0062, "num_tokens": 26503469.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9733021259307861, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07596876596732975, "rewards/wordcountpos_reward/raw_geo/std": 0.08366904544231499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 939.875, "completions/mean_terminated_length": 939.875, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.1228245649129826, "frac_reward_zero_std": 0.0, "grad_norm": 3.980765546849583, "kl": 0.00487518310546875, "learning_rate": 9.98599826615764e-07, "loss": 0.0105, "num_tokens": 26533035.0, "reward": 0.0, "reward_std": 0.6733291745185852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014356009050411659, "rewards/wordcountpos_reward/raw_geo/std": 0.017854352485406063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.1230246049209842, "frac_reward_zero_std": 0.0, "grad_norm": 3.893811188225448, "kl": 0.0047454833984375, "learning_rate": 9.985749482777447e-07, "loss": 0.0027, "num_tokens": 26572165.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0360522270202637, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04120507683636414, "rewards/wordcountpos_reward/raw_geo/std": 0.1340279500130438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1051.125, "completions/mean_terminated_length": 1051.125, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.1232246449289858, "frac_reward_zero_std": 0.0, "grad_norm": 3.0674836647561516, "kl": 0.0035247802734375, "learning_rate": 9.985498512125438e-07, "loss": -0.055, "num_tokens": 26604031.0, "reward": 0.0, "reward_std": 0.9960306286811829, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06768566342256044, "rewards/wordcountpos_reward/raw_geo/std": 0.08527267630277444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1111.1875, "completions/mean_terminated_length": 1085.2667236328125, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.1234246849369874, "frac_reward_zero_std": 0.0, "grad_norm": 2.799652363512096, "kl": 0.003025054931640625, "learning_rate": 9.985245354323985e-07, "loss": 0.0033, "num_tokens": 26643394.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0629085302352905, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03913411351888386, "rewards/wordcountpos_reward/raw_geo/std": 0.04767410521977387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1163.6875, "completions/mean_terminated_length": 1086.076904296875, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.123624724944989, "frac_reward_zero_std": 0.0, "grad_norm": 3.364450538142126, "kl": 0.00482940673828125, "learning_rate": 9.984990009496531e-07, "loss": 0.0129, "num_tokens": 26687109.0, "reward": -7.450580596923828e-09, "reward_std": 1.063175082206726, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11514919795469915, "rewards/wordcountpos_reward/raw_geo/std": 0.06422640611243444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1176.875, "completions/mean_terminated_length": 1155.3333740234375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.12382476495299059, "frac_reward_zero_std": 0.0, "grad_norm": 3.289606833352297, "kl": 0.00428009033203125, "learning_rate": 9.984732477767583e-07, "loss": 0.0533, "num_tokens": 26733875.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6770787239074707, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17012176081642488, "rewards/wordcountpos_reward/raw_geo/std": 0.08161800095832152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1006.125, "completions/mean_terminated_length": 1006.125, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.1240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.76935441621033, "kl": 0.005126953125, "learning_rate": 9.984472759262715e-07, "loss": 0.0109, "num_tokens": 26768773.0, "reward": -1.862645149230957e-08, "reward_std": 1.0479369163513184, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04200083852984359, "rewards/wordcountpos_reward/raw_geo/std": 0.07508393691671464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.16141733350404336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1205.9375, "completions/mean_terminated_length": 1163.9285888671875, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.1242248449689938, "frac_reward_zero_std": 0.0, "grad_norm": 3.0235433804391216, "kl": 0.004535675048828125, "learning_rate": 9.984210854108563e-07, "loss": -0.045, "num_tokens": 26818532.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8869249820709229, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04740029056353276, "rewards/wordcountpos_reward/raw_geo/std": 0.18896733059278176, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 987.8125, "completions/mean_terminated_length": 987.8125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.1244248849769954, "frac_reward_zero_std": 0.0, "grad_norm": 3.6696230262281166, "kl": 0.0048065185546875, "learning_rate": 9.98394676243284e-07, "loss": 0.0263, "num_tokens": 26852241.0, "reward": -5.960464477539063e-08, "reward_std": 0.9135434031486511, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02709717430519235, "rewards/wordcountpos_reward/raw_geo/std": 0.03719682286016161, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1036.4375, "completions/mean_terminated_length": 1036.4375, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.124624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 3.4522917928057533, "kl": 0.004734039306640625, "learning_rate": 9.983680484364312e-07, "loss": -0.0031, "num_tokens": 26901280.0, "reward": 0.0, "reward_std": 0.6508069038391113, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06533817396710875, "rewards/wordcountpos_reward/raw_geo/std": 0.36540788595245494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1118.0, "completions/max_terminated_length": 1118.0, "completions/mean_length": 918.0625, "completions/mean_terminated_length": 918.0625, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.1248249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 2.579914600881425, "kl": 0.003475189208984375, "learning_rate": 9.98341202003282e-07, "loss": -0.0463, "num_tokens": 26940993.0, "reward": -2.9802322387695312e-08, "reward_std": 0.44443702697753906, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0626511853748225, "rewards/wordcountpos_reward/raw_geo/std": 0.13308606533254844, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1046.75, "completions/mean_terminated_length": 1046.75, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.1250250050010002, "frac_reward_zero_std": 0.0, "grad_norm": 3.401655397624474, "kl": 0.0059356689453125, "learning_rate": 9.983141369569269e-07, "loss": 0.0306, "num_tokens": 26987189.0, "reward": 0.0, "reward_std": 0.5568276643753052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18044071231883343, "rewards/wordcountpos_reward/raw_geo/std": 0.08709184195541693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1070.125, "completions/mean_terminated_length": 1070.125, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.1252250450090018, "frac_reward_zero_std": 0.0, "grad_norm": 2.698697801681276, "kl": 0.003925323486328125, "learning_rate": 9.982868533105628e-07, "loss": 0.0315, "num_tokens": 27033975.0, "reward": 0.0, "reward_std": 0.909441351890564, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05037267939674402, "rewards/wordcountpos_reward/raw_geo/std": 0.06662628101123586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1171.8125, "completions/mean_terminated_length": 1149.933349609375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.1254250850170034, "frac_reward_zero_std": 0.0, "grad_norm": 3.158058147482287, "kl": 0.0053558349609375, "learning_rate": 9.982593510774934e-07, "loss": -0.0165, "num_tokens": 27086668.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5071286559104919, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05255425439892183, "rewards/wordcountpos_reward/raw_geo/std": 0.07159454359449532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1012.8125, "completions/mean_terminated_length": 1012.8125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.125625125025005, "frac_reward_zero_std": 0.0, "grad_norm": 2.783341504357864, "kl": 0.003597259521484375, "learning_rate": 9.98231630271129e-07, "loss": -0.0026, "num_tokens": 27124089.0, "reward": 3.725290298461914e-09, "reward_std": 1.0599117279052734, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02447395541263964, "rewards/wordcountpos_reward/raw_geo/std": 0.112989011009317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195308, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1051.75, "completions/mean_terminated_length": 1051.75, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.12582516503300661, "frac_reward_zero_std": 0.0, "grad_norm": 3.1794291553351517, "kl": 0.00438690185546875, "learning_rate": 9.982036909049862e-07, "loss": -0.0209, "num_tokens": 27164317.0, "reward": 0.0, "reward_std": 0.8919508457183838, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0604702040709929, "rewards/wordcountpos_reward/raw_geo/std": 0.049235970855206486, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1026.9375, "completions/mean_terminated_length": 1026.9375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.1260252050410082, "frac_reward_zero_std": 0.0, "grad_norm": 3.4953474271926464, "kl": 0.0059356689453125, "learning_rate": 9.981755329926885e-07, "loss": -0.159, "num_tokens": 27205148.0, "reward": 0.0, "reward_std": 0.897917628288269, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10790208297456225, "rewards/wordcountpos_reward/raw_geo/std": 0.06736836454728154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1211.9375, "completions/mean_terminated_length": 1192.7333984375, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.1262252450490098, "frac_reward_zero_std": 0.0, "grad_norm": 3.185263780268412, "kl": 0.004650115966796875, "learning_rate": 9.981471565479657e-07, "loss": 0.0485, "num_tokens": 27257843.0, "reward": -4.470348358154297e-08, "reward_std": 0.9278661012649536, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0111015291209416, "rewards/wordcountpos_reward/raw_geo/std": 0.08132531991087623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1236.3125, "completions/mean_terminated_length": 1198.6429443359375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.1264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 2.836985042513744, "kl": 0.0047607421875, "learning_rate": 9.981185615846547e-07, "loss": -0.0114, "num_tokens": 27300824.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0083012580871582, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07239144298136019, "rewards/wordcountpos_reward/raw_geo/std": 0.14773738321392313, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1191.0625, "completions/mean_terminated_length": 1050.6363525390625, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.126625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 3.0724060569436813, "kl": 0.00531768798828125, "learning_rate": 9.980897481166977e-07, "loss": -0.0809, "num_tokens": 27350177.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7770811319351196, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0933141414397164, "rewards/wordcountpos_reward/raw_geo/std": 0.16391916675149165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 1102.4375, "completions/mean_terminated_length": 1102.4375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.1268253650730146, "frac_reward_zero_std": 0.0, "grad_norm": 3.1225414018257385, "kl": 0.003955841064453125, "learning_rate": 9.980607161581453e-07, "loss": 0.0015, "num_tokens": 27387200.0, "reward": -7.450580596923828e-09, "reward_std": 1.0623854398727417, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.018069248445128507, "rewards/wordcountpos_reward/raw_geo/std": 0.0804868379396318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 989.625, "completions/mean_terminated_length": 989.625, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.1270254050810162, "frac_reward_zero_std": 0.0, "grad_norm": 2.3134785508629094, "kl": 0.0017547607421875, "learning_rate": 9.980314657231528e-07, "loss": -0.0176, "num_tokens": 27417098.0, "reward": -2.9802322387695312e-08, "reward_std": 0.38615089654922485, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015131948342750749, "rewards/wordcountpos_reward/raw_geo/std": 0.20097860617405225, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1057.25, "completions/mean_terminated_length": 1057.25, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.1272254450890178, "frac_reward_zero_std": 0.0, "grad_norm": 3.2924162942987945, "kl": 0.003551483154296875, "learning_rate": 9.980019968259832e-07, "loss": 0.0028, "num_tokens": 27453838.0, "reward": 0.0, "reward_std": 0.9801968336105347, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16367684594306642, "rewards/wordcountpos_reward/raw_geo/std": 0.15655037214855191, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1082.375, "completions/mean_terminated_length": 1082.375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.1274254850970194, "frac_reward_zero_std": 0.0, "grad_norm": 3.302973903215818, "kl": 0.00485992431640625, "learning_rate": 9.979723094810057e-07, "loss": -0.0025, "num_tokens": 27489916.0, "reward": 0.0, "reward_std": 0.8288685083389282, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08162939565676491, "rewards/wordcountpos_reward/raw_geo/std": 0.2209381204447789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1168.875, "completions/mean_terminated_length": 1058.5, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.127625525105021, "frac_reward_zero_std": 0.0, "grad_norm": 3.1607709508175716, "kl": 0.004749298095703125, "learning_rate": 9.979424037026958e-07, "loss": 0.0638, "num_tokens": 27536842.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0416853427886963, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12464990888913079, "rewards/wordcountpos_reward/raw_geo/std": 0.14783292213404392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1040.9375, "completions/mean_terminated_length": 1040.9375, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.1278255651130226, "frac_reward_zero_std": 0.0, "grad_norm": 3.5474317398854116, "kl": 0.00756072998046875, "learning_rate": 9.979122795056359e-07, "loss": -0.0581, "num_tokens": 27585081.0, "reward": -7.450580596923828e-09, "reward_std": 1.0642361640930176, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04133626728577251, "rewards/wordcountpos_reward/raw_geo/std": 0.22301695708502775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1042.25, "completions/mean_terminated_length": 1042.25, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.1280256051210242, "frac_reward_zero_std": 0.0, "grad_norm": 3.1010985284945747, "kl": 0.00457000732421875, "learning_rate": 9.978819369045144e-07, "loss": -0.0277, "num_tokens": 27634117.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9609041810035706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08407491682913257, "rewards/wordcountpos_reward/raw_geo/std": 0.07843184608381397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1099.0, "completions/max_terminated_length": 1099.0, "completions/mean_length": 802.6875, "completions/mean_terminated_length": 802.6875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.12822564512902582, "frac_reward_zero_std": 0.0, "grad_norm": 3.3952886007573824, "kl": 0.00311279296875, "learning_rate": 9.978513759141268e-07, "loss": 0.0302, "num_tokens": 27673512.0, "reward": 0.0, "reward_std": 0.9679796099662781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061750412664388535, "rewards/wordcountpos_reward/raw_geo/std": 0.048486349365749956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.20471298788158854, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1276.6875, "completions/mean_terminated_length": 1202.25, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.1284256851370274, "frac_reward_zero_std": 0.0, "grad_norm": 3.3113531173578883, "kl": 0.0063934326171875, "learning_rate": 9.978205965493745e-07, "loss": 0.0183, "num_tokens": 27721371.0, "reward": 0.0, "reward_std": 0.8945666551589966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10797679995523339, "rewards/wordcountpos_reward/raw_geo/std": 0.08869039820513923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1097.7333984375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.128625725145029, "frac_reward_zero_std": 0.0, "grad_norm": 3.246690819953855, "kl": 0.00644683837890625, "learning_rate": 9.97789598825266e-07, "loss": 0.0141, "num_tokens": 27770041.0, "reward": 0.0, "reward_std": 0.9412751197814941, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10239796086178213, "rewards/wordcountpos_reward/raw_geo/std": 0.1343962627298669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477447, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1061.9375, "completions/mean_terminated_length": 1032.7333984375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.1288257651530306, "frac_reward_zero_std": 0.0, "grad_norm": 3.453987658257602, "kl": 0.00827789306640625, "learning_rate": 9.977583827569156e-07, "loss": 0.0144, "num_tokens": 27822056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3767787218093872, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07131612583711619, "rewards/wordcountpos_reward/raw_geo/std": 0.07171287484603306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1108.3125, "completions/mean_terminated_length": 1108.3125, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.1290258051610322, "frac_reward_zero_std": 0.0, "grad_norm": 3.327779265101733, "kl": 0.00616455078125, "learning_rate": 9.977269483595446e-07, "loss": -0.0291, "num_tokens": 27868237.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9169150590896606, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021763183621388668, "rewards/wordcountpos_reward/raw_geo/std": 0.33525742826908483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1223.375, "completions/mean_terminated_length": 1204.933349609375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.12922584516903382, "frac_reward_zero_std": 0.0, "grad_norm": 2.570055547889121, "kl": 0.0032806396484375, "learning_rate": 9.976952956484806e-07, "loss": -0.0291, "num_tokens": 27915787.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9806604385375977, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054030268740384677, "rewards/wordcountpos_reward/raw_geo/std": 0.13011261092611223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1073.0, "completions/mean_terminated_length": 1073.0, "completions/min_length": 692.0, "completions/min_terminated_length": 692.0, "epoch": 0.1294258851770354, "frac_reward_zero_std": 0.0, "grad_norm": 2.941630411573727, "kl": 0.003917694091796875, "learning_rate": 9.976634246391574e-07, "loss": -0.0228, "num_tokens": 27957611.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8614913821220398, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3734023307367399, "rewards/wordcountpos_reward/raw_geo/std": 0.1143456490050215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1312.0, "completions/mean_terminated_length": 1299.4666748046875, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.12962592518503702, "frac_reward_zero_std": 0.0, "grad_norm": 1.7540730861096523, "kl": 0.002410888671875, "learning_rate": 9.976313353471158e-07, "loss": 0.007, "num_tokens": 28003027.0, "reward": 0.0, "reward_std": 0.8530524969100952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17744547359621543, "rewards/wordcountpos_reward/raw_geo/std": 0.129647648986365, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1143.625, "completions/mean_terminated_length": 1119.86669921875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.1298259651930386, "frac_reward_zero_std": 0.0, "grad_norm": 3.400985168708016, "kl": 0.00530242919921875, "learning_rate": 9.975990277880021e-07, "loss": -0.0, "num_tokens": 28051293.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0485291481018066, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10346131727919117, "rewards/wordcountpos_reward/raw_geo/std": 0.17460243592844008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1212.0, "completions/mean_terminated_length": 1170.857177734375, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.1300260052010402, "frac_reward_zero_std": 0.0, "grad_norm": 3.3510074609565645, "kl": 0.00614166259765625, "learning_rate": 9.9756650197757e-07, "loss": 0.0712, "num_tokens": 28102165.0, "reward": -7.450580596923828e-09, "reward_std": 1.0400404930114746, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06826006684558153, "rewards/wordcountpos_reward/raw_geo/std": 0.12794528340479538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1095.5625, "completions/mean_terminated_length": 1095.5625, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.1302260452090418, "frac_reward_zero_std": 0.0, "grad_norm": 3.075463854845879, "kl": 0.004222869873046875, "learning_rate": 9.975337579316792e-07, "loss": -0.0176, "num_tokens": 28146190.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0490224361419678, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15050571812914165, "rewards/wordcountpos_reward/raw_geo/std": 0.06928872750146535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1085.125, "completions/mean_terminated_length": 1085.125, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.1304260852170434, "frac_reward_zero_std": 0.0, "grad_norm": 2.1562108504239106, "kl": 0.00295257568359375, "learning_rate": 9.975007956662958e-07, "loss": -0.0086, "num_tokens": 28186128.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9152057766914368, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.035800482873339694, "rewards/wordcountpos_reward/raw_geo/std": 0.05670761202596771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 893.375, "completions/mean_terminated_length": 893.375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.13062612522504502, "frac_reward_zero_std": 0.0, "grad_norm": 3.2702118853201094, "kl": 0.004428863525390625, "learning_rate": 9.974676151974924e-07, "loss": -0.084, "num_tokens": 28232990.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9846186637878418, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0414241074887866, "rewards/wordcountpos_reward/raw_geo/std": 0.04514410942858959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1196.0625, "completions/mean_terminated_length": 1094.75, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.1308261652330466, "frac_reward_zero_std": 0.0, "grad_norm": 3.3719955995987636, "kl": 0.004749298095703125, "learning_rate": 9.97434216541448e-07, "loss": 0.0341, "num_tokens": 28277695.0, "reward": 0.0, "reward_std": 0.6445549726486206, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06249388830725872, "rewards/wordcountpos_reward/raw_geo/std": 0.2274145114862602, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1195.0, "completions/mean_terminated_length": 1195.0, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.13102620524104822, "frac_reward_zero_std": 0.0, "grad_norm": 2.5589524029496737, "kl": 0.003894805908203125, "learning_rate": 9.974005997144479e-07, "loss": -0.0219, "num_tokens": 28317327.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5491290092468262, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21602396028552, "rewards/wordcountpos_reward/raw_geo/std": 0.12123997475876024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1188.25, "completions/mean_terminated_length": 1084.3333740234375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.1312262452490498, "frac_reward_zero_std": 0.0, "grad_norm": 3.4310007665360565, "kl": 0.0052032470703125, "learning_rate": 9.973667647328835e-07, "loss": 0.0166, "num_tokens": 28355931.0, "reward": 0.0, "reward_std": 1.0214509963989258, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.041002112446201305, "rewards/wordcountpos_reward/raw_geo/std": 0.10049333673600769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.094182643679026, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1018.6875, "completions/mean_terminated_length": 986.6000366210938, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.1314262852570514, "frac_reward_zero_std": 0.0, "grad_norm": 3.5411403747541748, "kl": 0.00519561767578125, "learning_rate": 9.973327116132535e-07, "loss": -0.0389, "num_tokens": 28401726.0, "reward": 0.0, "reward_std": 0.863051176071167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04423193519333256, "rewards/wordcountpos_reward/raw_geo/std": 0.059488267363860876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1092.1875, "completions/mean_terminated_length": 1065.0, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.13162632526505302, "frac_reward_zero_std": 0.0, "grad_norm": 3.4776309441757878, "kl": 0.005279541015625, "learning_rate": 9.972984403721617e-07, "loss": 0.0415, "num_tokens": 28443193.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9779000282287598, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22567542424594958, "rewards/wordcountpos_reward/raw_geo/std": 0.122762869851685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1045.933349609375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.1318263652730546, "frac_reward_zero_std": 0.0, "grad_norm": 2.469400012411404, "kl": 0.005725860595703125, "learning_rate": 9.972639510263196e-07, "loss": -0.0658, "num_tokens": 28481150.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9975144863128662, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14032742480850013, "rewards/wordcountpos_reward/raw_geo/std": 0.07303950883303385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1270.625, "completions/mean_terminated_length": 1217.6923828125, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.13202640528105622, "frac_reward_zero_std": 0.0, "grad_norm": 3.234344237069856, "kl": 0.006439208984375, "learning_rate": 9.972292435925436e-07, "loss": -0.0107, "num_tokens": 28525928.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7132295370101929, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08332458437535507, "rewards/wordcountpos_reward/raw_geo/std": 0.2423254158475807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1082.4375, "completions/mean_terminated_length": 1082.4375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.1322264452890578, "frac_reward_zero_std": 0.0, "grad_norm": 3.1700540986054953, "kl": 0.00649261474609375, "learning_rate": 9.971943180877578e-07, "loss": -0.0196, "num_tokens": 28565135.0, "reward": 0.0, "reward_std": 0.9550644755363464, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.040341277385431855, "rewards/wordcountpos_reward/raw_geo/std": 0.0712034567734884, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087684, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1090.25, "completions/mean_terminated_length": 1090.25, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.13242648529705942, "frac_reward_zero_std": 0.0, "grad_norm": 3.50163149675052, "kl": 0.00691986083984375, "learning_rate": 9.97159174528992e-07, "loss": -0.0083, "num_tokens": 28614627.0, "reward": 0.0, "reward_std": 0.7552675604820251, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04568235324103673, "rewards/wordcountpos_reward/raw_geo/std": 0.11796028086435413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 947.3125, "completions/mean_terminated_length": 910.4667358398438, "completions/min_length": 636.0, "completions/min_terminated_length": 636.0, "epoch": 0.132626525305061, "frac_reward_zero_std": 0.0, "grad_norm": 3.845323115483345, "kl": 0.0078277587890625, "learning_rate": 9.97123812933382e-07, "loss": -0.0435, "num_tokens": 28652640.0, "reward": 0.0, "reward_std": 1.0025032758712769, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23569810293731933, "rewards/wordcountpos_reward/raw_geo/std": 0.3384914477541847, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1192.4375, "completions/mean_terminated_length": 1171.933349609375, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.1328265653130626, "frac_reward_zero_std": 0.0, "grad_norm": 3.304372477741638, "kl": 0.00621795654296875, "learning_rate": 9.970882333181705e-07, "loss": 0.0087, "num_tokens": 28692511.0, "reward": -1.4901161193847656e-08, "reward_std": 1.048022747039795, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006602699671060386, "rewards/wordcountpos_reward/raw_geo/std": 0.03777902085105849, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 991.0625, "completions/mean_terminated_length": 957.1333618164062, "completions/min_length": 611.0, "completions/min_terminated_length": 611.0, "epoch": 0.13302660532106422, "frac_reward_zero_std": 0.0, "grad_norm": 2.8131400706173237, "kl": 0.00263214111328125, "learning_rate": 9.970524357007062e-07, "loss": -0.011, "num_tokens": 28725016.0, "reward": -7.450580596923828e-09, "reward_std": 1.0291789770126343, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05325052424207354, "rewards/wordcountpos_reward/raw_geo/std": 0.10540294845922886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1371.1875, "completions/mean_terminated_length": 1328.25, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1332266453290658, "frac_reward_zero_std": 0.0, "grad_norm": 2.9863509949236753, "kl": 0.00701904296875, "learning_rate": 9.970164200984443e-07, "loss": -0.0186, "num_tokens": 28775331.0, "reward": 0.0, "reward_std": 0.7168537378311157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.240252370502287, "rewards/wordcountpos_reward/raw_geo/std": 0.1105960634530435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717426, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1063.375, "completions/mean_terminated_length": 1063.375, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.13342668533706742, "frac_reward_zero_std": 0.0, "grad_norm": 3.142214576501579, "kl": 0.0045623779296875, "learning_rate": 9.96980186528946e-07, "loss": 0.0055, "num_tokens": 28814041.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0318855047225952, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10171903810984985, "rewards/wordcountpos_reward/raw_geo/std": 0.16995287458764927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1163.75, "completions/mean_terminated_length": 1115.71435546875, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.133626725345069, "frac_reward_zero_std": 0.0, "grad_norm": 3.253372187753335, "kl": 0.00621795654296875, "learning_rate": 9.969437350098792e-07, "loss": -0.0263, "num_tokens": 28863349.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0098556280136108, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05990797004300492, "rewards/wordcountpos_reward/raw_geo/std": 0.10188959470906758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1006.6875, "completions/mean_terminated_length": 1006.6875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.13382676535307061, "frac_reward_zero_std": 0.0, "grad_norm": 3.4764647965790667, "kl": 0.0055389404296875, "learning_rate": 9.969070655590176e-07, "loss": 0.0314, "num_tokens": 28906232.0, "reward": 0.0, "reward_std": 0.8855102062225342, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03819761022437591, "rewards/wordcountpos_reward/raw_geo/std": 0.15089505994649535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1247.9375, "completions/mean_terminated_length": 1163.916748046875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.13402680536107223, "frac_reward_zero_std": 0.0, "grad_norm": 2.4846989439490264, "kl": 0.0050048828125, "learning_rate": 9.968701781942416e-07, "loss": -0.1845, "num_tokens": 28955095.0, "reward": 0.0, "reward_std": 0.9443983435630798, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1520027507303361, "rewards/wordcountpos_reward/raw_geo/std": 0.1367502075335975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.19474579822405907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 980.6875, "completions/mean_terminated_length": 980.6875, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.1342268453690738, "frac_reward_zero_std": 0.0, "grad_norm": 3.268405289422698, "kl": 0.004718780517578125, "learning_rate": 9.968330729335373e-07, "loss": -0.0262, "num_tokens": 28991298.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9413201808929443, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 3.4723568840691957e-06, "rewards/wordcountpos_reward/raw_geo/std": 0.16894296312032356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1355373393953503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1055.4375, "completions/mean_terminated_length": 1055.4375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.13442688537707542, "frac_reward_zero_std": 0.0, "grad_norm": 3.614158830986014, "kl": 0.0110931396484375, "learning_rate": 9.967957497949977e-07, "loss": -0.0303, "num_tokens": 29041065.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8191667795181274, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23981766865371792, "rewards/wordcountpos_reward/raw_geo/std": 0.31159167452390013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1164.6875, "completions/mean_terminated_length": 1116.7857666015625, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.134626925385077, "frac_reward_zero_std": 0.0, "grad_norm": 3.476532545378162, "kl": 0.008636474609375, "learning_rate": 9.967582087968216e-07, "loss": -0.0427, "num_tokens": 29091876.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6427819728851318, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07378987834921148, "rewards/wordcountpos_reward/raw_geo/std": 0.1400941718133652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1280.375, "completions/mean_terminated_length": 1249.0, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.13482696539307862, "frac_reward_zero_std": 0.0, "grad_norm": 3.0970888606647975, "kl": 0.0067901611328125, "learning_rate": 9.967204499573144e-07, "loss": 0.015, "num_tokens": 29128946.0, "reward": 0.0, "reward_std": 0.7480720281600952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21311493454456562, "rewards/wordcountpos_reward/raw_geo/std": 0.10351996953299572, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1207.3125, "completions/mean_terminated_length": 1187.800048828125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.1350270054010802, "frac_reward_zero_std": 0.0, "grad_norm": 2.6745800353908464, "kl": 0.004638671875, "learning_rate": 9.96682473294887e-07, "loss": -0.0239, "num_tokens": 29172743.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9135780334472656, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12160674771147578, "rewards/wordcountpos_reward/raw_geo/std": 0.17578519741606502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1144.375, "completions/mean_terminated_length": 1144.375, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.13522704540908181, "frac_reward_zero_std": 0.0, "grad_norm": 1.9476825347610727, "kl": 0.00518798828125, "learning_rate": 9.966442788280575e-07, "loss": 0.0118, "num_tokens": 29217605.0, "reward": 7.450580596923828e-09, "reward_std": 1.002336025238037, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16464423071540582, "rewards/wordcountpos_reward/raw_geo/std": 0.1532432998548804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1188.5, "completions/mean_terminated_length": 1167.7333984375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.13542708541708343, "frac_reward_zero_std": 0.0, "grad_norm": 3.3755287836118857, "kl": 0.0082550048828125, "learning_rate": 9.966058665754494e-07, "loss": -0.0668, "num_tokens": 29270741.0, "reward": 0.0, "reward_std": 0.6608616709709167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23340531596752148, "rewards/wordcountpos_reward/raw_geo/std": 0.14174178380742677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 952.375, "completions/mean_terminated_length": 952.375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.135627125425085, "frac_reward_zero_std": 0.0, "grad_norm": 2.7624594871829484, "kl": 0.004543304443359375, "learning_rate": 9.965672365557928e-07, "loss": 0.0228, "num_tokens": 29310939.0, "reward": 7.450580596923828e-09, "reward_std": 1.0148861408233643, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0006055944986289026, "rewards/wordcountpos_reward/raw_geo/std": 0.03041083385154536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1156.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 862.3125, "completions/mean_terminated_length": 862.3125, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.13582716543308662, "frac_reward_zero_std": 0.0, "grad_norm": 4.40865964743789, "kl": 0.0063629150390625, "learning_rate": 9.96528388787924e-07, "loss": 0.0238, "num_tokens": 29348792.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8102241158485413, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02602516324429506, "rewards/wordcountpos_reward/raw_geo/std": 0.1502081507064256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1163.5, "completions/mean_terminated_length": 1141.0667724609375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.1360272054410882, "frac_reward_zero_std": 0.0, "grad_norm": 2.4148042308996507, "kl": 0.0050048828125, "learning_rate": 9.964893232907847e-07, "loss": -0.0181, "num_tokens": 29390840.0, "reward": 0.0, "reward_std": 0.7928985357284546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02477742068729108, "rewards/wordcountpos_reward/raw_geo/std": 0.07537619440262582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16549588783075214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 942.5, "completions/mean_terminated_length": 942.5, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.13622724544908982, "frac_reward_zero_std": 0.0, "grad_norm": 3.8849979245723825, "kl": 0.00829315185546875, "learning_rate": 9.964500400834242e-07, "loss": -0.041, "num_tokens": 29436464.0, "reward": 1.4901161193847656e-08, "reward_std": 1.043959379196167, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22229496293426804, "rewards/wordcountpos_reward/raw_geo/std": 0.10262288181478094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1086.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 978.0625, "completions/mean_terminated_length": 978.0625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.13642728545709143, "frac_reward_zero_std": 0.0, "grad_norm": 2.308428565876076, "kl": 0.00312042236328125, "learning_rate": 9.964105391849968e-07, "loss": -0.0188, "num_tokens": 29469161.0, "reward": 0.0, "reward_std": 0.7602218389511108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18210586500254414, "rewards/wordcountpos_reward/raw_geo/std": 0.18249962833621194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1043.5625, "completions/mean_terminated_length": 1043.5625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.136627325465093, "frac_reward_zero_std": 0.0, "grad_norm": 3.347932976671946, "kl": 0.00540924072265625, "learning_rate": 9.963708206147635e-07, "loss": 0.0248, "num_tokens": 29519402.0, "reward": 0.0, "reward_std": 0.5571043491363525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02263269429901924, "rewards/wordcountpos_reward/raw_geo/std": 0.030775433610970383, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1045.3125, "completions/mean_terminated_length": 1045.3125, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.13682736547309463, "frac_reward_zero_std": 0.0, "grad_norm": 3.3655345045803515, "kl": 0.00521087646484375, "learning_rate": 9.96330884392091e-07, "loss": 0.0309, "num_tokens": 29549455.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8928267955780029, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3070642784715, "rewards/wordcountpos_reward/raw_geo/std": 0.11910458814440902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1008.8125, "completions/mean_terminated_length": 895.4615478515625, "completions/min_length": 466.0, "completions/min_terminated_length": 466.0, "epoch": 0.1370274054810962, "frac_reward_zero_std": 0.0, "grad_norm": 3.7113494184727935, "kl": 0.00677490234375, "learning_rate": 9.962907305364528e-07, "loss": -0.0274, "num_tokens": 29587364.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7454226016998291, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.26854109123445347, "rewards/wordcountpos_reward/raw_geo/std": 0.28636950916948584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1108.0625, "completions/mean_terminated_length": 1108.0625, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.13722744548909782, "frac_reward_zero_std": 0.0, "grad_norm": 3.216933367268093, "kl": 0.00492095947265625, "learning_rate": 9.962503590674276e-07, "loss": -0.0155, "num_tokens": 29631389.0, "reward": 0.0, "reward_std": 0.8615149855613708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.055286124124821374, "rewards/wordcountpos_reward/raw_geo/std": 0.10167580749230112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 940.3125, "completions/mean_terminated_length": 940.3125, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.13742748549709943, "frac_reward_zero_std": 0.0, "grad_norm": 3.243173977804894, "kl": 0.00577545166015625, "learning_rate": 9.962097700047008e-07, "loss": 0.0252, "num_tokens": 29680354.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7638654112815857, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054423855935719107, "rewards/wordcountpos_reward/raw_geo/std": 0.17388371951199952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1229.25, "completions/mean_terminated_length": 1211.2000732421875, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.13762752550510102, "frac_reward_zero_std": 0.0, "grad_norm": 2.2922082506539088, "kl": 0.003414154052734375, "learning_rate": 9.96168963368064e-07, "loss": -0.0195, "num_tokens": 29717958.0, "reward": -3.725290298461914e-09, "reward_std": 1.060276985168457, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16127617426569996, "rewards/wordcountpos_reward/raw_geo/std": 0.09716465677783157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1065.75, "completions/mean_terminated_length": 1036.800048828125, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.13782756551310263, "frac_reward_zero_std": 0.0, "grad_norm": 3.1986067944426546, "kl": 0.00455474853515625, "learning_rate": 9.96127939177415e-07, "loss": -0.0063, "num_tokens": 29751562.0, "reward": 0.0, "reward_std": 0.9840933084487915, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05837005965237116, "rewards/wordcountpos_reward/raw_geo/std": 0.2589641194830504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 1164.0, "completions/mean_terminated_length": 1164.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.1380276055211042, "frac_reward_zero_std": 0.0, "grad_norm": 2.518302959899328, "kl": 0.005924224853515625, "learning_rate": 9.960866974527567e-07, "loss": 0.0198, "num_tokens": 29792418.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7731319069862366, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053263698647258016, "rewards/wordcountpos_reward/raw_geo/std": 0.0793153271995976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390615, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1184.3125, "completions/mean_terminated_length": 1184.3125, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.13822764552910582, "frac_reward_zero_std": 0.0, "grad_norm": 2.40174455281427, "kl": 0.0048370361328125, "learning_rate": 9.960452382141992e-07, "loss": 0.0074, "num_tokens": 29840911.0, "reward": -5.960464477539063e-08, "reward_std": 0.9344772696495056, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05228336251761571, "rewards/wordcountpos_reward/raw_geo/std": 0.06650767942038709, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1221.0, "completions/mean_terminated_length": 1202.4000244140625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.1384276855371074, "frac_reward_zero_std": 0.0, "grad_norm": 2.5302045187486986, "kl": 0.00519561767578125, "learning_rate": 9.960035614819581e-07, "loss": -0.0785, "num_tokens": 29883799.0, "reward": -5.960464477539063e-08, "reward_std": 0.7695052623748779, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0537022261445076, "rewards/wordcountpos_reward/raw_geo/std": 0.06477909293607671, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1014.25, "completions/mean_terminated_length": 981.86669921875, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.13862772554510902, "frac_reward_zero_std": 0.0, "grad_norm": 3.0059344255770997, "kl": 0.0048980712890625, "learning_rate": 9.959616672763551e-07, "loss": 0.0266, "num_tokens": 29925883.0, "reward": 0.0, "reward_std": 0.8059113025665283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16419694865627432, "rewards/wordcountpos_reward/raw_geo/std": 0.07599964126845102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1003.625, "completions/mean_terminated_length": 1003.625, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.13882776555311063, "frac_reward_zero_std": 0.0, "grad_norm": 3.5259171549541213, "kl": 0.00650787353515625, "learning_rate": 9.959195556178182e-07, "loss": -0.0015, "num_tokens": 29965309.0, "reward": -7.450580596923828e-09, "reward_std": 1.0141409635543823, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13576232252922812, "rewards/wordcountpos_reward/raw_geo/std": 0.14926357078338248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1151.5, "completions/mean_terminated_length": 1151.5, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.13902780556111222, "frac_reward_zero_std": 0.0, "grad_norm": 3.466923960735747, "kl": 0.00620269775390625, "learning_rate": 9.95877226526881e-07, "loss": -0.0206, "num_tokens": 30009213.0, "reward": 0.0, "reward_std": 0.9081611633300781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0882971282508942, "rewards/wordcountpos_reward/raw_geo/std": 0.13655016171677775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033237, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1072.25, "completions/mean_terminated_length": 1043.7333984375, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.13922784556911383, "frac_reward_zero_std": 0.0, "grad_norm": 2.877632305498997, "kl": 0.00446319580078125, "learning_rate": 9.958346800241833e-07, "loss": -0.0789, "num_tokens": 30051697.0, "reward": -7.450580596923828e-09, "reward_std": 0.972619354724884, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.021725031389030623, "rewards/wordcountpos_reward/raw_geo/std": 0.03661588391024977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1167.125, "completions/mean_terminated_length": 1119.571533203125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.1394278855771154, "frac_reward_zero_std": 0.0, "grad_norm": 3.054419664841711, "kl": 0.0088653564453125, "learning_rate": 9.957919161304714e-07, "loss": -0.0369, "num_tokens": 30103739.0, "reward": 0.0, "reward_std": 1.0333055257797241, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0057409345208391735, "rewards/wordcountpos_reward/raw_geo/std": 0.10382577793180792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1240.375, "completions/mean_terminated_length": 1223.0667724609375, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.13962792558511702, "frac_reward_zero_std": 0.0, "grad_norm": 3.292924062157794, "kl": 0.0104522705078125, "learning_rate": 9.957489348665968e-07, "loss": -0.0531, "num_tokens": 30155473.0, "reward": 0.0, "reward_std": 0.6561108827590942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02371765219262611, "rewards/wordcountpos_reward/raw_geo/std": 0.11354867448102532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1150.0, "completions/mean_terminated_length": 1150.0, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.13982796559311864, "frac_reward_zero_std": 0.0, "grad_norm": 2.6640346128681567, "kl": 0.004730224609375, "learning_rate": 9.957057362535175e-07, "loss": -0.0007, "num_tokens": 30201673.0, "reward": 0.0, "reward_std": 0.6640896797180176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03628437041867817, "rewards/wordcountpos_reward/raw_geo/std": 0.09626891586953862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1130.9375, "completions/mean_terminated_length": 1106.3333740234375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.14002800560112022, "frac_reward_zero_std": 0.0, "grad_norm": 2.73126955211748, "kl": 0.0034275054931640625, "learning_rate": 9.956623203122972e-07, "loss": 0.0458, "num_tokens": 30252296.0, "reward": 0.0, "reward_std": 0.800506591796875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12573467228988663, "rewards/wordcountpos_reward/raw_geo/std": 0.10527396540553197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752094, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1103.75, "completions/mean_terminated_length": 1103.75, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.14022804560912183, "frac_reward_zero_std": 0.0, "grad_norm": 2.559370707789578, "kl": 0.00408172607421875, "learning_rate": 9.956186870641057e-07, "loss": -0.039, "num_tokens": 30290108.0, "reward": 0.0, "reward_std": 0.5168172121047974, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01391879818602983, "rewards/wordcountpos_reward/raw_geo/std": 0.07164518020495673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1303.4375, "completions/mean_terminated_length": 1303.4375, "completions/min_length": 1070.0, "completions/min_terminated_length": 1070.0, "epoch": 0.14042808561712342, "frac_reward_zero_std": 0.0, "grad_norm": 2.7192980531774356, "kl": 0.005126953125, "learning_rate": 9.955748365302192e-07, "loss": 0.0117, "num_tokens": 30336259.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8233336210250854, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10136648307876828, "rewards/wordcountpos_reward/raw_geo/std": 0.15914360706719755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1040.5625, "completions/mean_terminated_length": 1009.9334106445312, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.14062812562512503, "frac_reward_zero_std": 0.0, "grad_norm": 3.9485772058384923, "kl": 0.007110595703125, "learning_rate": 9.955307687320188e-07, "loss": -0.0008, "num_tokens": 30379068.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8600001335144043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11216864698780595, "rewards/wordcountpos_reward/raw_geo/std": 0.022922390218119638, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14707015206910487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 997.5, "completions/mean_terminated_length": 997.5, "completions/min_length": 720.0, "completions/min_terminated_length": 720.0, "epoch": 0.1408281656331266, "frac_reward_zero_std": 0.0, "grad_norm": 3.493676699900381, "kl": 0.0056915283203125, "learning_rate": 9.954864836909928e-07, "loss": -0.0479, "num_tokens": 30418876.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9076901078224182, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0071481929813572, "rewards/wordcountpos_reward/raw_geo/std": 0.22921296867951962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1142.625, "completions/mean_terminated_length": 1142.625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.14102820564112822, "frac_reward_zero_std": 0.0, "grad_norm": 2.9639948982159057, "kl": 0.0070953369140625, "learning_rate": 9.954419814287342e-07, "loss": -0.0238, "num_tokens": 30467718.0, "reward": 0.0, "reward_std": 0.767283022403717, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17308119246840772, "rewards/wordcountpos_reward/raw_geo/std": 0.11223524603087588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1158.5, "completions/mean_terminated_length": 1158.5, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.14122824564912984, "frac_reward_zero_std": 0.0, "grad_norm": 3.8843145288951173, "kl": 0.00765228271484375, "learning_rate": 9.953972619669427e-07, "loss": -0.0442, "num_tokens": 30516190.0, "reward": 0.0, "reward_std": 0.9820117354393005, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02992030303461632, "rewards/wordcountpos_reward/raw_geo/std": 0.1281319358892255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1130.625, "completions/mean_terminated_length": 1130.625, "completions/min_length": 989.0, "completions/min_terminated_length": 989.0, "epoch": 0.14142828565713142, "frac_reward_zero_std": 0.0, "grad_norm": 3.238582098036172, "kl": 0.00623321533203125, "learning_rate": 9.953523253274238e-07, "loss": -0.0129, "num_tokens": 30559456.0, "reward": 5.960464477539063e-08, "reward_std": 0.7889923453330994, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00939426314301261, "rewards/wordcountpos_reward/raw_geo/std": 0.09751999661939352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1350.4375, "completions/mean_terminated_length": 1200.875, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.14162832566513303, "frac_reward_zero_std": 0.0, "grad_norm": 1.978293402370285, "kl": 0.0026874542236328125, "learning_rate": 9.953071715320888e-07, "loss": -0.0039, "num_tokens": 30610111.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0519108772277832, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1741235838635124, "rewards/wordcountpos_reward/raw_geo/std": 0.04403836839294878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1219.75, "completions/mean_terminated_length": 1179.71435546875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.14182836567313462, "frac_reward_zero_std": 0.0, "grad_norm": 3.333057252728575, "kl": 0.00664520263671875, "learning_rate": 9.952618006029548e-07, "loss": 0.0278, "num_tokens": 30650387.0, "reward": 0.0, "reward_std": 0.5008547306060791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14025724785888286, "rewards/wordcountpos_reward/raw_geo/std": 0.08485098297679669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1102.1875, "completions/mean_terminated_length": 1102.1875, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.14202840568113623, "frac_reward_zero_std": 0.0, "grad_norm": 2.6774147607652132, "kl": 0.0044403076171875, "learning_rate": 9.95216212562145e-07, "loss": 0.0082, "num_tokens": 30698294.0, "reward": 5.960464477539063e-08, "reward_std": 0.6835624575614929, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06311745003414003, "rewards/wordcountpos_reward/raw_geo/std": 0.13948938680928943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1092.0625, "completions/mean_terminated_length": 906.6364135742188, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.14222844568913784, "frac_reward_zero_std": 0.0, "grad_norm": 3.0546089944500445, "kl": 0.00469207763671875, "learning_rate": 9.951704074318883e-07, "loss": -0.0249, "num_tokens": 30744439.0, "reward": -2.9802322387695312e-08, "reward_std": 0.628603458404541, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10739659977576964, "rewards/wordcountpos_reward/raw_geo/std": 0.07779821935513596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1170.0, "completions/mean_terminated_length": 1148.0, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.14242848569713942, "frac_reward_zero_std": 0.0, "grad_norm": 3.862725587204926, "kl": 0.0083770751953125, "learning_rate": 9.951243852345196e-07, "loss": 0.0181, "num_tokens": 30793999.0, "reward": 7.450580596923828e-09, "reward_std": 1.0435220003128052, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.35213315369770076, "rewards/wordcountpos_reward/raw_geo/std": 0.05763721943742654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1393.3125, "completions/mean_terminated_length": 1329.300048828125, "completions/min_length": 1219.0, "completions/min_terminated_length": 1219.0, "epoch": 0.14262852570514103, "frac_reward_zero_std": 0.0, "grad_norm": 3.2670437030095085, "kl": 0.00617218017578125, "learning_rate": 9.95078145992479e-07, "loss": -0.0166, "num_tokens": 30842468.0, "reward": 3.725290298461914e-09, "reward_std": 1.0632672309875488, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07208442551012503, "rewards/wordcountpos_reward/raw_geo/std": 0.2627307507639042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1206.5, "completions/mean_terminated_length": 1206.5, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.14282856571314262, "frac_reward_zero_std": 0.0, "grad_norm": 2.9241609126731363, "kl": 0.005245208740234375, "learning_rate": 9.950316897283137e-07, "loss": 0.0034, "num_tokens": 30888204.0, "reward": 0.0, "reward_std": 0.7585967183113098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053196519062599604, "rewards/wordcountpos_reward/raw_geo/std": 0.12888531258144625, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1091.3125, "completions/mean_terminated_length": 1091.3125, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.14302860572114423, "frac_reward_zero_std": 0.0, "grad_norm": 2.771106836625132, "kl": 0.0041351318359375, "learning_rate": 9.949850164646756e-07, "loss": 0.0285, "num_tokens": 30934737.0, "reward": 0.0, "reward_std": 0.8382663726806641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14603299511443718, "rewards/wordcountpos_reward/raw_geo/std": 0.11244707488262339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1047.8125, "completions/mean_terminated_length": 1017.666748046875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.14322864572914584, "frac_reward_zero_std": 0.0, "grad_norm": 3.426852639639691, "kl": 0.0054168701171875, "learning_rate": 9.949381262243225e-07, "loss": 0.0132, "num_tokens": 30976606.0, "reward": 5.960464477539063e-08, "reward_std": 0.5217741131782532, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16936015769452664, "rewards/wordcountpos_reward/raw_geo/std": 0.185387502350504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1139.0, "completions/mean_length": 983.25, "completions/mean_terminated_length": 948.800048828125, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.14342868573714743, "frac_reward_zero_std": 0.0, "grad_norm": 3.885401042416404, "kl": 0.00608062744140625, "learning_rate": 9.94891019030119e-07, "loss": 0.0353, "num_tokens": 31013138.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9144117832183838, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03453104921911609, "rewards/wordcountpos_reward/raw_geo/std": 0.17395946248572433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1201.4375, "completions/mean_terminated_length": 1181.533447265625, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.14362872574514904, "frac_reward_zero_std": 0.0, "grad_norm": 2.5575164662328134, "kl": 0.00366973876953125, "learning_rate": 9.948436949050343e-07, "loss": -0.0143, "num_tokens": 31059481.0, "reward": 1.862645149230957e-08, "reward_std": 1.0672872066497803, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01463547907012421, "rewards/wordcountpos_reward/raw_geo/std": 0.07692061541498454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1062.125, "completions/mean_terminated_length": 1062.125, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.14382876575315062, "frac_reward_zero_std": 0.0, "grad_norm": 2.654257872990229, "kl": 0.0034637451171875, "learning_rate": 9.94796153872144e-07, "loss": 0.0295, "num_tokens": 31109275.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9103240370750427, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05953108948483753, "rewards/wordcountpos_reward/raw_geo/std": 0.06727902500973598, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1058.5625, "completions/mean_terminated_length": 1029.1334228515625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.14402880576115223, "frac_reward_zero_std": 0.0, "grad_norm": 3.5922169500272374, "kl": 0.007232666015625, "learning_rate": 9.947483959546293e-07, "loss": 0.0068, "num_tokens": 31150284.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9292308688163757, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09370068959943902, "rewards/wordcountpos_reward/raw_geo/std": 0.13843699606577375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1044.4375, "completions/mean_terminated_length": 1044.4375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.14422884576915382, "frac_reward_zero_std": 0.0, "grad_norm": 2.9323599712703636, "kl": 0.00423431396484375, "learning_rate": 9.94700421175777e-07, "loss": -0.0123, "num_tokens": 31195763.0, "reward": 0.0, "reward_std": 0.49132829904556274, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17546977703420125, "rewards/wordcountpos_reward/raw_geo/std": 0.41867445773614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1312.0, "completions/mean_terminated_length": 1268.615478515625, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.14442888577715543, "frac_reward_zero_std": 0.0, "grad_norm": 3.317288321785738, "kl": 0.0067138671875, "learning_rate": 9.946522295589801e-07, "loss": 0.0305, "num_tokens": 31244731.0, "reward": 0.0, "reward_std": 0.5410593748092651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2706143842692229, "rewards/wordcountpos_reward/raw_geo/std": 0.2736734719163004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725114, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 880.4375, "completions/mean_terminated_length": 880.4375, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.14462892578515704, "frac_reward_zero_std": 0.0, "grad_norm": 3.5854368586298704, "kl": 0.0055999755859375, "learning_rate": 9.94603821127737e-07, "loss": -0.0311, "num_tokens": 31279458.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9416681528091431, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1467374778123789, "rewards/wordcountpos_reward/raw_geo/std": 0.05100519292653718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 921.3125, "completions/mean_terminated_length": 921.3125, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.14482896579315863, "frac_reward_zero_std": 0.0, "grad_norm": 3.503033907202783, "kl": 0.005615234375, "learning_rate": 9.945551959056518e-07, "loss": -0.0035, "num_tokens": 31329031.0, "reward": 0.0, "reward_std": 0.7892371416091919, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10875541293612151, "rewards/wordcountpos_reward/raw_geo/std": 0.08962381679720997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0910840068085298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1255.0625, "completions/mean_terminated_length": 1108.0999755859375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.14502900580116024, "frac_reward_zero_std": 0.0, "grad_norm": 3.1389930286765964, "kl": 0.00562286376953125, "learning_rate": 9.945063539164344e-07, "loss": 0.0488, "num_tokens": 31385048.0, "reward": -1.4901161193847656e-08, "reward_std": 1.040879487991333, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07732004099190525, "rewards/wordcountpos_reward/raw_geo/std": 0.060054882866430806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1216.86669921875, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.14522904580916182, "frac_reward_zero_std": 0.0, "grad_norm": 3.1695657531737464, "kl": 0.006683349609375, "learning_rate": 9.944572951839003e-07, "loss": -0.0612, "num_tokens": 31433281.0, "reward": 0.0, "reward_std": 0.7444674968719482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0407388972403369, "rewards/wordcountpos_reward/raw_geo/std": 0.05057335402229615, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1189.0, "completions/mean_terminated_length": 1117.2308349609375, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.14542908581716343, "frac_reward_zero_std": 0.0, "grad_norm": 2.79028113516693, "kl": 0.00530242919921875, "learning_rate": 9.94408019731971e-07, "loss": -0.0603, "num_tokens": 31487097.0, "reward": 0.0, "reward_std": 0.8400046825408936, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1559528611625961, "rewards/wordcountpos_reward/raw_geo/std": 0.15576773285069773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1259.25, "completions/mean_terminated_length": 1243.2000732421875, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.14562912582516505, "frac_reward_zero_std": 0.0, "grad_norm": 3.5740344672616278, "kl": 0.0075836181640625, "learning_rate": 9.94358527584673e-07, "loss": -0.0366, "num_tokens": 31530533.0, "reward": 0.0, "reward_std": 0.7851624488830566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025958064734766265, "rewards/wordcountpos_reward/raw_geo/std": 0.2049492670871542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1224.0625, "completions/mean_terminated_length": 1009.4444580078125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.14582916583316663, "frac_reward_zero_std": 0.0, "grad_norm": 2.9136454977175377, "kl": 0.0062255859375, "learning_rate": 9.943088187661394e-07, "loss": 0.0031, "num_tokens": 31579678.0, "reward": 0.0, "reward_std": 0.7163681387901306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2224011053626009, "rewards/wordcountpos_reward/raw_geo/std": 0.6999590374254487, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1042.8125, "completions/mean_terminated_length": 1042.8125, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.14602920584116824, "frac_reward_zero_std": 0.0, "grad_norm": 3.3735998435710277, "kl": 0.00632476806640625, "learning_rate": 9.94258893300608e-07, "loss": -0.0878, "num_tokens": 31620995.0, "reward": -1.862645149230957e-09, "reward_std": 0.9324001669883728, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.008924512394700927, "rewards/wordcountpos_reward/raw_geo/std": 0.022503535407724942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1184.875, "completions/mean_terminated_length": 1163.86669921875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.14622924584916983, "frac_reward_zero_std": 0.0, "grad_norm": 3.3061452594883525, "kl": 0.0060272216796875, "learning_rate": 9.942087512124232e-07, "loss": -0.0084, "num_tokens": 31658377.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5267903804779053, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025016088143917627, "rewards/wordcountpos_reward/raw_geo/std": 0.10311097625767426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1080.3125, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.14642928585717144, "frac_reward_zero_std": 0.0, "grad_norm": 3.4756112170773035, "kl": 0.0081024169921875, "learning_rate": 9.94158392526034e-07, "loss": 0.0011, "num_tokens": 31691022.0, "reward": 1.4901161193847656e-08, "reward_std": 0.93564772605896, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013113661731094118, "rewards/wordcountpos_reward/raw_geo/std": 0.029943185327959396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 922.3125, "completions/mean_terminated_length": 922.3125, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.14662932586517302, "frac_reward_zero_std": 0.0, "grad_norm": 2.9405757475850285, "kl": 0.00597381591796875, "learning_rate": 9.941078172659955e-07, "loss": 0.0108, "num_tokens": 31728115.0, "reward": 0.0, "reward_std": 0.7895586490631104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05735508340614822, "rewards/wordcountpos_reward/raw_geo/std": 0.21383923615787387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1044.125, "completions/mean_terminated_length": 1044.125, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.14682936587317463, "frac_reward_zero_std": 0.0, "grad_norm": 3.221447547916469, "kl": 0.005321502685546875, "learning_rate": 9.94057025456969e-07, "loss": -0.004, "num_tokens": 31764693.0, "reward": 0.0, "reward_std": 1.0384291410446167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027346554022988852, "rewards/wordcountpos_reward/raw_geo/std": 0.05012748370006775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1293.625, "completions/mean_terminated_length": 1169.800048828125, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.14702940588117624, "frac_reward_zero_std": 0.0, "grad_norm": 2.565880363332157, "kl": 0.004779815673828125, "learning_rate": 9.940060171237204e-07, "loss": 0.0044, "num_tokens": 31810935.0, "reward": 0.0, "reward_std": 0.9921772480010986, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08258514967635046, "rewards/wordcountpos_reward/raw_geo/std": 0.14775844679852268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.23090001042619038, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 915.3125, "completions/mean_terminated_length": 915.3125, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.14722944588917783, "frac_reward_zero_std": 0.0, "grad_norm": 3.034484580566022, "kl": 0.00516510009765625, "learning_rate": 9.939547922911215e-07, "loss": -0.042, "num_tokens": 31853932.0, "reward": -1.862645149230957e-08, "reward_std": 1.0283491611480713, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010920550721835287, "rewards/wordcountpos_reward/raw_geo/std": 0.11000018238737036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 981.4375, "completions/mean_terminated_length": 981.4375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.14742948589717944, "frac_reward_zero_std": 0.0, "grad_norm": 3.1664390033328695, "kl": 0.005584716796875, "learning_rate": 9.9390335098415e-07, "loss": 0.0094, "num_tokens": 31895347.0, "reward": -7.450580596923828e-09, "reward_std": 1.0350008010864258, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.16003932504552604, "rewards/wordcountpos_reward/raw_geo/std": 0.06289044526740273, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.11729986896522632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1020.75, "completions/mean_terminated_length": 1020.75, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.14762952590518102, "frac_reward_zero_std": 0.0, "grad_norm": 2.4456819284001785, "kl": 0.0038013458251953125, "learning_rate": 9.938516932278888e-07, "loss": -0.0242, "num_tokens": 31930247.0, "reward": 0.0, "reward_std": 0.6868234872817993, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12892066146394085, "rewards/wordcountpos_reward/raw_geo/std": 0.17193502673549263, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1126.0, "completions/mean_terminated_length": 1072.571533203125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.14782956591318264, "frac_reward_zero_std": 0.0, "grad_norm": 1.9059478960136789, "kl": 0.0029144287109375, "learning_rate": 9.937998190475266e-07, "loss": -0.0446, "num_tokens": 31966103.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0434917211532593, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03284074013793732, "rewards/wordcountpos_reward/raw_geo/std": 0.0948683697455548, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1079.9375, "completions/mean_terminated_length": 983.0000610351562, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.14802960592118425, "frac_reward_zero_std": 0.0, "grad_norm": 3.245806013307816, "kl": 0.00577545166015625, "learning_rate": 9.937477284683574e-07, "loss": 0.0028, "num_tokens": 32013542.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4754641652107239, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1492716070061221, "rewards/wordcountpos_reward/raw_geo/std": 0.11078340341472707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1133.625, "completions/mean_terminated_length": 1133.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.14822964592918583, "frac_reward_zero_std": 0.0, "grad_norm": 2.7144974746630073, "kl": 0.0039825439453125, "learning_rate": 9.936954215157807e-07, "loss": -0.0454, "num_tokens": 32043712.0, "reward": 0.0, "reward_std": 0.8612264394760132, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20620909880804195, "rewards/wordcountpos_reward/raw_geo/std": 0.04436061698883284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1018.1875, "completions/mean_terminated_length": 1018.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.14842968593718744, "frac_reward_zero_std": 0.0, "grad_norm": 3.6877184711393194, "kl": 0.0062713623046875, "learning_rate": 9.936428982153017e-07, "loss": 0.0018, "num_tokens": 32071235.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8817063570022583, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02431519355908192, "rewards/wordcountpos_reward/raw_geo/std": 0.04731358555872976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1156.8125, "completions/mean_terminated_length": 1156.8125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.14862972594518903, "frac_reward_zero_std": 0.0, "grad_norm": 2.727083189462933, "kl": 0.0050811767578125, "learning_rate": 9.935901585925309e-07, "loss": -0.002, "num_tokens": 32115200.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9644925594329834, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022266794489776795, "rewards/wordcountpos_reward/raw_geo/std": 0.2399209765150163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16141733350404336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1272.375, "completions/mean_terminated_length": 1095.3333740234375, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.14882976595319064, "frac_reward_zero_std": 0.0, "grad_norm": 3.2697999810790837, "kl": 0.006439208984375, "learning_rate": 9.935372026731847e-07, "loss": -0.0733, "num_tokens": 32170958.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9885945320129395, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09825797115494561, "rewards/wordcountpos_reward/raw_geo/std": 0.09147345310798999, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1125.625, "completions/mean_terminated_length": 1125.625, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.14902980596119225, "frac_reward_zero_std": 0.0, "grad_norm": 2.702942452417753, "kl": 0.00390625, "learning_rate": 9.934840304830843e-07, "loss": 0.0248, "num_tokens": 32217168.0, "reward": 5.960464477539063e-08, "reward_std": 0.4093765616416931, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05871235552625615, "rewards/wordcountpos_reward/raw_geo/std": 0.13916236591237247, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1203.1875, "completions/mean_terminated_length": 1183.4000244140625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.14922984596919384, "frac_reward_zero_std": 0.0, "grad_norm": 3.080933903956693, "kl": 0.007293701171875, "learning_rate": 9.934306420481567e-07, "loss": 0.0034, "num_tokens": 32261027.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9661141037940979, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009531510017604446, "rewards/wordcountpos_reward/raw_geo/std": 0.140438020058125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1203.625, "completions/mean_terminated_length": 1161.2857666015625, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.14942988597719545, "frac_reward_zero_std": 0.0, "grad_norm": 3.220097252933624, "kl": 0.0067596435546875, "learning_rate": 9.933770373944344e-07, "loss": 0.0159, "num_tokens": 32314493.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9547731876373291, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18587379606769372, "rewards/wordcountpos_reward/raw_geo/std": 0.3465018471227371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1173.75, "completions/mean_terminated_length": 1127.1429443359375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.14962992598519703, "frac_reward_zero_std": 0.0, "grad_norm": 3.391438531266516, "kl": 0.0070648193359375, "learning_rate": 9.933232165480555e-07, "loss": -0.007, "num_tokens": 32358361.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9440287351608276, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17168122591343074, "rewards/wordcountpos_reward/raw_geo/std": 0.18252298291517974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1162.25, "completions/mean_terminated_length": 1162.25, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.14982996599319864, "frac_reward_zero_std": 0.0, "grad_norm": 2.4449131856697055, "kl": 0.00525665283203125, "learning_rate": 9.932691795352632e-07, "loss": -0.0383, "num_tokens": 32401717.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8779486417770386, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16634102989475238, "rewards/wordcountpos_reward/raw_geo/std": 0.11342233354185205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 993.375, "completions/mean_terminated_length": 993.375, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.15003000600120023, "frac_reward_zero_std": 0.0, "grad_norm": 2.466370035448869, "kl": 0.0038604736328125, "learning_rate": 9.93214926382406e-07, "loss": -0.0407, "num_tokens": 32454363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8034205436706543, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009196259360216556, "rewards/wordcountpos_reward/raw_geo/std": 0.08845259004288432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1149.0625, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.15023004600920184, "frac_reward_zero_std": 0.0, "grad_norm": 2.8897311396365217, "kl": 0.00701141357421875, "learning_rate": 9.931604571159382e-07, "loss": 0.0268, "num_tokens": 32501916.0, "reward": 7.450580596923828e-09, "reward_std": 0.9287494421005249, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.021950882587778955, "rewards/wordcountpos_reward/raw_geo/std": 0.10769324891808538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1098.0, "completions/mean_length": 973.5625, "completions/mean_terminated_length": 898.357177734375, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.15043008601720345, "frac_reward_zero_std": 0.0, "grad_norm": 3.046997666110846, "kl": 0.00571441650390625, "learning_rate": 9.931057717624192e-07, "loss": -0.0049, "num_tokens": 32547909.0, "reward": 0.0, "reward_std": 0.32828089594841003, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13448202488360017, "rewards/wordcountpos_reward/raw_geo/std": 0.07732084908212608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1095.3125, "completions/mean_terminated_length": 1095.3125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15063012602520504, "frac_reward_zero_std": 0.0, "grad_norm": 3.067917425095031, "kl": 0.0070037841796875, "learning_rate": 9.930508703485136e-07, "loss": 0.0391, "num_tokens": 32590770.0, "reward": 0.0, "reward_std": 0.8541622161865234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011933431246930814, "rewards/wordcountpos_reward/raw_geo/std": 0.08689841186406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1088.3125, "completions/mean_terminated_length": 1060.86669921875, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.15083016603320665, "frac_reward_zero_std": 0.0, "grad_norm": 3.068528995159541, "kl": 0.006439208984375, "learning_rate": 9.929957529009918e-07, "loss": 0.0306, "num_tokens": 32628855.0, "reward": 2.9802322387695312e-08, "reward_std": 0.38358789682388306, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.043046412045031665, "rewards/wordcountpos_reward/raw_geo/std": 0.25446104557176796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1150.0625, "completions/mean_terminated_length": 1126.7333984375, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.15103020604120823, "frac_reward_zero_std": 0.0, "grad_norm": 3.2115053114292307, "kl": 0.0078125, "learning_rate": 9.929404194467294e-07, "loss": -0.0406, "num_tokens": 32669312.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7136757969856262, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1089064821287911, "rewards/wordcountpos_reward/raw_geo/std": 0.29028223127328384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1201.0, "completions/mean_terminated_length": 1201.0, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.15123024604920984, "frac_reward_zero_std": 0.0, "grad_norm": 3.0151202960823644, "kl": 0.0086212158203125, "learning_rate": 9.92884870012707e-07, "loss": -0.0313, "num_tokens": 32715840.0, "reward": 0.0, "reward_std": 0.7436599731445312, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.5061100551384478, "rewards/wordcountpos_reward/raw_geo/std": 0.41022376086854195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1256.0, "completions/max_terminated_length": 1256.0, "completions/mean_length": 961.1875, "completions/mean_terminated_length": 961.1875, "completions/min_length": 649.0, "completions/min_terminated_length": 649.0, "epoch": 0.15143028605721146, "frac_reward_zero_std": 0.0, "grad_norm": 3.2990875132831365, "kl": 0.005046844482421875, "learning_rate": 9.92829104626011e-07, "loss": 0.005, "num_tokens": 32748427.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9326552152633667, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046124962930485486, "rewards/wordcountpos_reward/raw_geo/std": 0.05156686921410211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 882.75, "completions/mean_terminated_length": 882.75, "completions/min_length": 604.0, "completions/min_terminated_length": 604.0, "epoch": 0.15163032606521304, "frac_reward_zero_std": 0.0, "grad_norm": 2.9441380322509407, "kl": 0.00485992431640625, "learning_rate": 9.927731233138326e-07, "loss": -0.026, "num_tokens": 32785207.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0674519538879395, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -3.161302955743527e-05, "rewards/wordcountpos_reward/raw_geo/std": 0.062072532673726956, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1212.1875, "completions/mean_terminated_length": 1193.0001220703125, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.15183036607321465, "frac_reward_zero_std": 0.0, "grad_norm": 2.992312458461508, "kl": 0.007843017578125, "learning_rate": 9.927169261034687e-07, "loss": 0.0049, "num_tokens": 32831642.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0592217445373535, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06948679730555916, "rewards/wordcountpos_reward/raw_geo/std": 0.05500610085768121, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1141.5625, "completions/mean_terminated_length": 1117.666748046875, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.15203040608121624, "frac_reward_zero_std": 0.0, "grad_norm": 3.7420053574943477, "kl": 0.00701141357421875, "learning_rate": 9.926605130223215e-07, "loss": 0.0009, "num_tokens": 32886659.0, "reward": -5.960464477539063e-08, "reward_std": 0.7499135732650757, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21018875023133704, "rewards/wordcountpos_reward/raw_geo/std": 0.16591610409029345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1093.0, "completions/mean_length": 898.9375, "completions/mean_terminated_length": 858.86669921875, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.15223044608921785, "frac_reward_zero_std": 0.0, "grad_norm": 3.9902208007971676, "kl": 0.0074005126953125, "learning_rate": 9.926038840978979e-07, "loss": -0.0374, "num_tokens": 32935474.0, "reward": 0.0, "reward_std": 0.7089831829071045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02938673985528205, "rewards/wordcountpos_reward/raw_geo/std": 0.17560534291750896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1203.5625, "completions/mean_terminated_length": 1203.5625, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.15243048609721943, "frac_reward_zero_std": 0.0, "grad_norm": 3.156381242634378, "kl": 0.0059356689453125, "learning_rate": 9.925470393578105e-07, "loss": -0.0071, "num_tokens": 32987083.0, "reward": 0.0, "reward_std": 0.5327888131141663, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12694652656832794, "rewards/wordcountpos_reward/raw_geo/std": 0.07866495400936042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1229.0, "completions/mean_terminated_length": 1138.666748046875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.15263052610522104, "frac_reward_zero_std": 0.0, "grad_norm": 3.343199628347586, "kl": 0.008636474609375, "learning_rate": 9.924899788297773e-07, "loss": -0.0287, "num_tokens": 33036267.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7331575155258179, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13364927828035372, "rewards/wordcountpos_reward/raw_geo/std": 0.11826819552272544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 972.375, "completions/mean_terminated_length": 972.375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.15283056611322265, "frac_reward_zero_std": 0.0, "grad_norm": 3.9499213157672792, "kl": 0.0081939697265625, "learning_rate": 9.924327025416213e-07, "loss": -0.0443, "num_tokens": 33073721.0, "reward": 2.2351741790771484e-08, "reward_std": 1.05270254611969, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001051356776890749, "rewards/wordcountpos_reward/raw_geo/std": 0.13097985987539415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1110.875, "completions/mean_terminated_length": 1110.875, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.15303060612122424, "frac_reward_zero_std": 0.0, "grad_norm": 2.1374146190028673, "kl": 0.00360870361328125, "learning_rate": 9.9237521052127e-07, "loss": -0.0295, "num_tokens": 33115543.0, "reward": 5.960464477539063e-08, "reward_std": 0.7502629160881042, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03101903696506245, "rewards/wordcountpos_reward/raw_geo/std": 0.1335707904399487, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1011.375, "completions/mean_terminated_length": 1011.375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.15323064612922585, "frac_reward_zero_std": 0.0, "grad_norm": 3.605569915819742, "kl": 0.00687408447265625, "learning_rate": 9.923175027967577e-07, "loss": -0.0252, "num_tokens": 33147101.0, "reward": 0.0, "reward_std": 0.6953427791595459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03576088829341519, "rewards/wordcountpos_reward/raw_geo/std": 0.10368129152337263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1100.9375, "completions/mean_terminated_length": 1074.3333740234375, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.15343068613722743, "frac_reward_zero_std": 0.0, "grad_norm": 3.467804417765987, "kl": 0.011077880859375, "learning_rate": 9.922595793962223e-07, "loss": -0.0183, "num_tokens": 33193932.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9323349595069885, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0400680178423756, "rewards/wordcountpos_reward/raw_geo/std": 0.16127185716275536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1036.0625, "completions/mean_terminated_length": 1005.1333618164062, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.15363072614522905, "frac_reward_zero_std": 0.0, "grad_norm": 3.334475969936804, "kl": 0.0064849853515625, "learning_rate": 9.92201440347908e-07, "loss": -0.009, "num_tokens": 33233789.0, "reward": 0.0, "reward_std": 0.9955896139144897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.047388124107884795, "rewards/wordcountpos_reward/raw_geo/std": 0.05931634592262268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1181.0, "completions/mean_terminated_length": 1181.0, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.15383076615323066, "frac_reward_zero_std": 0.0, "grad_norm": 3.0008269550301305, "kl": 0.005889892578125, "learning_rate": 9.921430856801631e-07, "loss": -0.0288, "num_tokens": 33278989.0, "reward": 0.0, "reward_std": 0.9743967056274414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024752236001593122, "rewards/wordcountpos_reward/raw_geo/std": 0.0472160239814566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635781, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1167.9375, "completions/mean_terminated_length": 1120.5, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.15403080616123224, "frac_reward_zero_std": 0.0, "grad_norm": 3.34023770231757, "kl": 0.00862884521484375, "learning_rate": 9.92084515421442e-07, "loss": 0.0008, "num_tokens": 33329788.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0200579166412354, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022090468620292347, "rewards/wordcountpos_reward/raw_geo/std": 0.04407479170293743, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1116.0, "completions/max_terminated_length": 1116.0, "completions/mean_length": 887.1875, "completions/mean_terminated_length": 887.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.15423084616923385, "frac_reward_zero_std": 0.0, "grad_norm": 4.084139581071052, "kl": 0.010040283203125, "learning_rate": 9.920257296003035e-07, "loss": -0.0259, "num_tokens": 33357175.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8843921422958374, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09800201277649528, "rewards/wordcountpos_reward/raw_geo/std": 0.08875685994311198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1132.75, "completions/mean_terminated_length": 1108.2667236328125, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.15443088617723544, "frac_reward_zero_std": 0.0, "grad_norm": 3.2775307182010196, "kl": 0.0063018798828125, "learning_rate": 9.919667282454123e-07, "loss": 0.0169, "num_tokens": 33401019.0, "reward": -2.9802322387695312e-08, "reward_std": 0.48580998182296753, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010818859185801187, "rewards/wordcountpos_reward/raw_geo/std": 0.13812552676118908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1250.25, "completions/mean_terminated_length": 1250.25, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.15463092618523705, "frac_reward_zero_std": 0.0, "grad_norm": 3.3491310414750557, "kl": 0.0070953369140625, "learning_rate": 9.919075113855374e-07, "loss": 0.0289, "num_tokens": 33439639.0, "reward": 0.0, "reward_std": 0.6744941473007202, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0828370915743899, "rewards/wordcountpos_reward/raw_geo/std": 0.060435216074655906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1100.9375, "completions/mean_terminated_length": 1100.9375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.15483096619323863, "frac_reward_zero_std": 0.0, "grad_norm": 2.7543085857557426, "kl": 0.005008697509765625, "learning_rate": 9.918480790495533e-07, "loss": -0.0211, "num_tokens": 33480782.0, "reward": 2.9802322387695312e-08, "reward_std": 0.703142523765564, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18140903927184088, "rewards/wordcountpos_reward/raw_geo/std": 0.20375648648593733, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1103.4375, "completions/mean_terminated_length": 1077.0, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.15503100620124025, "frac_reward_zero_std": 0.0, "grad_norm": 3.354352017332648, "kl": 0.0090484619140625, "learning_rate": 9.917884312664395e-07, "loss": -0.0725, "num_tokens": 33523581.0, "reward": 0.0, "reward_std": 0.8834471702575684, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14810737680414726, "rewards/wordcountpos_reward/raw_geo/std": 0.21037850141902315, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572018, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1078.125, "completions/mean_terminated_length": 1078.125, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.15523104620924186, "frac_reward_zero_std": 0.0, "grad_norm": 3.3468395667011417, "kl": 0.0092620849609375, "learning_rate": 9.917285680652805e-07, "loss": -0.044, "num_tokens": 33563463.0, "reward": 0.0, "reward_std": 0.5799727439880371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005290466044875371, "rewards/wordcountpos_reward/raw_geo/std": 0.29740636269808074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1341.9375, "completions/mean_terminated_length": 1305.4615478515625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.15543108621724344, "frac_reward_zero_std": 0.0, "grad_norm": 2.9151895767774505, "kl": 0.00675201416015625, "learning_rate": 9.916684894752659e-07, "loss": -0.0437, "num_tokens": 33609182.0, "reward": 0.0, "reward_std": 0.5609688758850098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02191452602287353, "rewards/wordcountpos_reward/raw_geo/std": 0.1947108641697368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1156.875, "completions/mean_terminated_length": 1156.875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.15563112622524505, "frac_reward_zero_std": 0.0, "grad_norm": 3.1438240134222766, "kl": 0.0071868896484375, "learning_rate": 9.916081955256902e-07, "loss": 0.0289, "num_tokens": 33651060.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8530892133712769, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024303858044810858, "rewards/wordcountpos_reward/raw_geo/std": 0.1037123075816192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1063.9375, "completions/mean_terminated_length": 1063.9375, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.15583116623324664, "frac_reward_zero_std": 0.0, "grad_norm": 2.9340274692827992, "kl": 0.00630950927734375, "learning_rate": 9.915476862459529e-07, "loss": -0.0184, "num_tokens": 33682843.0, "reward": 0.0, "reward_std": 0.7483388781547546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03263129962382793, "rewards/wordcountpos_reward/raw_geo/std": 0.09788571460764907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1115.5, "completions/mean_terminated_length": 1089.86669921875, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15603120624124825, "frac_reward_zero_std": 0.0, "grad_norm": 3.4798945538773367, "kl": 0.010955810546875, "learning_rate": 9.91486961665559e-07, "loss": -0.02, "num_tokens": 33731571.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9627860188484192, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0038596593208819474, "rewards/wordcountpos_reward/raw_geo/std": 0.09357520253925831, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1160.0625, "completions/mean_terminated_length": 1137.4000244140625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.15623124624924986, "frac_reward_zero_std": 0.0, "grad_norm": 3.6290723312151876, "kl": 0.00949859619140625, "learning_rate": 9.914260218141179e-07, "loss": -0.0007, "num_tokens": 33778788.0, "reward": 0.0, "reward_std": 0.8243637084960938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16686743051378558, "rewards/wordcountpos_reward/raw_geo/std": 0.23919704839104772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 945.25, "completions/mean_terminated_length": 945.25, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.15643128625725145, "frac_reward_zero_std": 0.0, "grad_norm": 3.3978790163975354, "kl": 0.00733184814453125, "learning_rate": 9.913648667213438e-07, "loss": 0.0087, "num_tokens": 33816752.0, "reward": 0.0, "reward_std": 0.7842049598693848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07163627658839805, "rewards/wordcountpos_reward/raw_geo/std": 0.11252061558288198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1328.0, "completions/mean_terminated_length": 1303.4285888671875, "completions/min_length": 1136.0, "completions/min_terminated_length": 1136.0, "epoch": 0.15663132626525306, "frac_reward_zero_std": 0.0, "grad_norm": 2.3640950608589484, "kl": 0.005664825439453125, "learning_rate": 9.913034964170567e-07, "loss": -0.006, "num_tokens": 33856304.0, "reward": 0.0, "reward_std": 0.6626273393630981, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1877841178051203, "rewards/wordcountpos_reward/raw_geo/std": 0.10278288994291368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1054.5, "completions/mean_terminated_length": 1054.5, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.15683136627325464, "frac_reward_zero_std": 0.0, "grad_norm": 3.1832911400260273, "kl": 0.00711822509765625, "learning_rate": 9.912419109311807e-07, "loss": 0.0048, "num_tokens": 33907536.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5279848575592041, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12141121207981156, "rewards/wordcountpos_reward/raw_geo/std": 0.2897728240012861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1253.875, "completions/mean_terminated_length": 1197.0770263671875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.15703140628125625, "frac_reward_zero_std": 0.0, "grad_norm": 3.612087941973419, "kl": 0.0083465576171875, "learning_rate": 9.911801102937455e-07, "loss": 0.0043, "num_tokens": 33956414.0, "reward": 0.0, "reward_std": 0.8467312455177307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06869423680342421, "rewards/wordcountpos_reward/raw_geo/std": 0.07720460893057841, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14801151106386087, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1156.25, "completions/mean_terminated_length": 1156.25, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.15723144628925786, "frac_reward_zero_std": 0.0, "grad_norm": 3.177757514213256, "kl": 0.007659912109375, "learning_rate": 9.91118094534885e-07, "loss": 0.0099, "num_tokens": 34003698.0, "reward": 0.0, "reward_std": 0.4837278127670288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10621223160088289, "rewards/wordcountpos_reward/raw_geo/std": 0.1536083964388962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.2237723711142063, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1269.875, "completions/mean_terminated_length": 1216.769287109375, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.15743148629725945, "frac_reward_zero_std": 0.0, "grad_norm": 3.094827749582465, "kl": 0.00800323486328125, "learning_rate": 9.910558636848384e-07, "loss": 0.013, "num_tokens": 34050360.0, "reward": 0.0, "reward_std": 0.8311507701873779, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12648163447065644, "rewards/wordcountpos_reward/raw_geo/std": 0.06127426615184167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.15763152630526106, "frac_reward_zero_std": 0.0, "grad_norm": 2.758995880215236, "kl": 0.007659912109375, "learning_rate": 9.909934177739502e-07, "loss": -0.0651, "num_tokens": 34096010.0, "reward": 0.0, "reward_std": 0.7082724571228027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14976231314995758, "rewards/wordcountpos_reward/raw_geo/std": 0.29622757573526715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13977495139343474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.15783156631326264, "frac_reward_zero_std": 0.0, "grad_norm": 2.2593399404785743, "kl": 0.0043792724609375, "learning_rate": 9.909307568326686e-07, "loss": -0.0171, "num_tokens": 34144862.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5086317658424377, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008440912037138155, "rewards/wordcountpos_reward/raw_geo/std": 0.13763680823236565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1192.875, "completions/mean_terminated_length": 1122.0, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.15803160632126426, "frac_reward_zero_std": 0.0, "grad_norm": 3.4937826097082887, "kl": 0.00675201416015625, "learning_rate": 9.90867880891548e-07, "loss": -0.0118, "num_tokens": 34189388.0, "reward": 7.450580596923828e-09, "reward_std": 1.0656585693359375, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.17107447489817776, "rewards/wordcountpos_reward/raw_geo/std": 0.06637125902080088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 951.9375, "completions/mean_terminated_length": 951.9375, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.15823164632926584, "frac_reward_zero_std": 0.0, "grad_norm": 4.0828284993154025, "kl": 0.0092010498046875, "learning_rate": 9.908047899812468e-07, "loss": -0.0381, "num_tokens": 34229043.0, "reward": 0.0, "reward_std": 0.7647432088851929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07455052843366836, "rewards/wordcountpos_reward/raw_geo/std": 0.05064521121716925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279464, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1234.0, "completions/mean_terminated_length": 1172.615478515625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.15843168633726745, "frac_reward_zero_std": 0.0, "grad_norm": 3.272521924112114, "kl": 0.0084381103515625, "learning_rate": 9.907414841325283e-07, "loss": 0.0114, "num_tokens": 34281283.0, "reward": 0.0, "reward_std": 0.701446533203125, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04508345847290812, "rewards/wordcountpos_reward/raw_geo/std": 0.07121332733857914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1161.375, "completions/mean_terminated_length": 1138.800048828125, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.15863172634526906, "frac_reward_zero_std": 0.0, "grad_norm": 2.681399929360685, "kl": 0.0063323974609375, "learning_rate": 9.906779633762606e-07, "loss": -0.0078, "num_tokens": 34324145.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4002396762371063, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03854446077547653, "rewards/wordcountpos_reward/raw_geo/std": 0.08550339471886417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1162.75, "completions/mean_terminated_length": 1140.2667236328125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.15883176635327065, "frac_reward_zero_std": 0.0, "grad_norm": 3.15440829909591, "kl": 0.009765625, "learning_rate": 9.906142277434172e-07, "loss": 0.0176, "num_tokens": 34371597.0, "reward": 0.0, "reward_std": 1.009348750114441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16334101228147407, "rewards/wordcountpos_reward/raw_geo/std": 0.05487435788636801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1163.25, "completions/mean_terminated_length": 1085.5384521484375, "completions/min_length": 1023.0, "completions/min_terminated_length": 1023.0, "epoch": 0.15903180636127226, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098794809363726, "kl": 0.0064849853515625, "learning_rate": 9.905502772650754e-07, "loss": -0.0252, "num_tokens": 34415513.0, "reward": 0.0, "reward_std": 0.605635941028595, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10950574364291185, "rewards/wordcountpos_reward/raw_geo/std": 0.14417382336820583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1103.0, "completions/max_terminated_length": 1103.0, "completions/mean_length": 908.25, "completions/mean_terminated_length": 908.25, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.15923184636927384, "frac_reward_zero_std": 0.0, "grad_norm": 2.873626986944352, "kl": 0.003871917724609375, "learning_rate": 9.904861119724178e-07, "loss": -0.0088, "num_tokens": 34454709.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6537288427352905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.039580564833577754, "rewards/wordcountpos_reward/raw_geo/std": 0.035559756890949014, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1197.8125, "completions/mean_terminated_length": 1197.8125, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.15943188637727546, "frac_reward_zero_std": 0.0, "grad_norm": 2.7709073663014485, "kl": 0.005016326904296875, "learning_rate": 9.904217318967318e-07, "loss": 0.0153, "num_tokens": 34494546.0, "reward": 2.60770320892334e-08, "reward_std": 0.9712255001068115, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06141137734164746, "rewards/wordcountpos_reward/raw_geo/std": 0.09061907886218735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1193.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 998.0, "completions/mean_terminated_length": 998.0, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.15963192638527707, "frac_reward_zero_std": 0.0, "grad_norm": 3.911791898690215, "kl": 0.010162353515625, "learning_rate": 9.903571370694094e-07, "loss": 0.0104, "num_tokens": 34543082.0, "reward": -1.4901161193847656e-08, "reward_std": 1.00223708152771, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06160501681784743, "rewards/wordcountpos_reward/raw_geo/std": 0.06019470116425234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1103.6875, "completions/mean_terminated_length": 1103.6875, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.15983196639327865, "frac_reward_zero_std": 0.0, "grad_norm": 3.3227233177959383, "kl": 0.00799560546875, "learning_rate": 9.902923275219475e-07, "loss": 0.0047, "num_tokens": 34574045.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0319489240646362, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10032981658898234, "rewards/wordcountpos_reward/raw_geo/std": 0.1059968891792867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1061.5, "completions/mean_terminated_length": 1061.5, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.16003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.672975893330212, "kl": 0.007659912109375, "learning_rate": 9.902273032859472e-07, "loss": -0.0089, "num_tokens": 34606405.0, "reward": 0.0, "reward_std": 0.8354414105415344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04063577222995593, "rewards/wordcountpos_reward/raw_geo/std": 0.0975359924965412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 968.0, "completions/mean_terminated_length": 968.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.16023204640928185, "frac_reward_zero_std": 0.0, "grad_norm": 3.850952848482492, "kl": 0.00897216796875, "learning_rate": 9.90162064393115e-07, "loss": -0.0336, "num_tokens": 34637141.0, "reward": 0.0, "reward_std": 0.9965044856071472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04524656828288196, "rewards/wordcountpos_reward/raw_geo/std": 0.056714281291219464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1199.4375, "completions/mean_terminated_length": 1062.8182373046875, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16043208641728346, "frac_reward_zero_std": 0.0, "grad_norm": 3.410119429660572, "kl": 0.00687408447265625, "learning_rate": 9.900966108752614e-07, "loss": 0.0273, "num_tokens": 34687476.0, "reward": 7.450580596923828e-09, "reward_std": 1.0613950490951538, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11234680641347201, "rewards/wordcountpos_reward/raw_geo/std": 0.05078166043023254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 956.375, "completions/mean_terminated_length": 956.375, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.16063212642528504, "frac_reward_zero_std": 0.0, "grad_norm": 3.7621150712402063, "kl": 0.0075225830078125, "learning_rate": 9.900309427643018e-07, "loss": 0.044, "num_tokens": 34727898.0, "reward": 0.0, "reward_std": 0.6777828931808472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.39093038955561893, "rewards/wordcountpos_reward/raw_geo/std": 0.24487287597197832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1013.0, "completions/mean_terminated_length": 1013.0, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.16083216643328666, "frac_reward_zero_std": 0.0, "grad_norm": 3.8139140704765246, "kl": 0.010498046875, "learning_rate": 9.899650600922566e-07, "loss": 0.0306, "num_tokens": 34775034.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9415539503097534, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17483805481864553, "rewards/wordcountpos_reward/raw_geo/std": 0.12094116116523004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 981.8125, "completions/mean_terminated_length": 981.8125, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.16103220644128827, "frac_reward_zero_std": 0.0, "grad_norm": 3.816486220351593, "kl": 0.0101776123046875, "learning_rate": 9.8989896289125e-07, "loss": 0.0063, "num_tokens": 34813615.0, "reward": 0.0, "reward_std": 0.8158285617828369, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07149384005877667, "rewards/wordcountpos_reward/raw_geo/std": 0.12996047715949613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1072.125, "completions/mean_terminated_length": 1043.60009765625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.16123224644928985, "frac_reward_zero_std": 0.0, "grad_norm": 2.7699095221827665, "kl": 0.00536346435546875, "learning_rate": 9.898326511935117e-07, "loss": -0.0025, "num_tokens": 34859065.0, "reward": 0.0, "reward_std": 0.8644427061080933, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014355155240497748, "rewards/wordcountpos_reward/raw_geo/std": 0.11236367552304588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 996.3125, "completions/mean_terminated_length": 996.3125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.16143228645729146, "frac_reward_zero_std": 0.0, "grad_norm": 3.452551918966318, "kl": 0.00699615478515625, "learning_rate": 9.897661250313755e-07, "loss": 0.0055, "num_tokens": 34892854.0, "reward": -7.450580596923828e-09, "reward_std": 1.0471452474594116, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03303136901863547, "rewards/wordcountpos_reward/raw_geo/std": 0.032051244998446984, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1194.1875, "completions/mean_terminated_length": 1055.181884765625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.16163232646529305, "frac_reward_zero_std": 0.0, "grad_norm": 2.608166205892848, "kl": 0.0068511962890625, "learning_rate": 9.896993844372794e-07, "loss": -0.2146, "num_tokens": 34933089.0, "reward": 0.0, "reward_std": 0.9058641195297241, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2515449946723898, "rewards/wordcountpos_reward/raw_geo/std": 0.2506604925653395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.18373692949230228, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1039.0, "completions/mean_terminated_length": 1039.0, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.16183236647329466, "frac_reward_zero_std": 0.0, "grad_norm": 2.6621287210917313, "kl": 0.00592041015625, "learning_rate": 9.896324294437672e-07, "loss": 0.024, "num_tokens": 34975401.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5400734543800354, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06484564826238085, "rewards/wordcountpos_reward/raw_geo/std": 0.21570354738245273, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1092.3125, "completions/mean_terminated_length": 1065.1334228515625, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.16203240648129627, "frac_reward_zero_std": 0.0, "grad_norm": 3.1963898660836096, "kl": 0.00835418701171875, "learning_rate": 9.895652600834859e-07, "loss": -0.0879, "num_tokens": 35009190.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9988131523132324, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09594663405594336, "rewards/wordcountpos_reward/raw_geo/std": 0.05843662382213452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1100.0909423828125, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.16223244648929785, "frac_reward_zero_std": 0.0, "grad_norm": 2.9087438998984143, "kl": 0.00605010986328125, "learning_rate": 9.894978763891879e-07, "loss": -0.0093, "num_tokens": 35060615.0, "reward": 0.0, "reward_std": 0.821182131767273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015604559454176714, "rewards/wordcountpos_reward/raw_geo/std": 0.057690123591530036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1079.3125, "completions/mean_terminated_length": 1079.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.16243248649729947, "frac_reward_zero_std": 0.0, "grad_norm": 3.5646145112160332, "kl": 0.0086212158203125, "learning_rate": 9.894302783937296e-07, "loss": -0.0528, "num_tokens": 35101980.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0420316457748413, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1819244669661149, "rewards/wordcountpos_reward/raw_geo/std": 0.2371044929453032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12995725793078622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1068.0, "completions/max_terminated_length": 1068.0, "completions/mean_length": 889.75, "completions/mean_terminated_length": 889.75, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.16263252650530105, "frac_reward_zero_std": 0.0, "grad_norm": 4.21720415067769, "kl": 0.00738525390625, "learning_rate": 9.89362466130072e-07, "loss": -0.0093, "num_tokens": 35141960.0, "reward": 7.450580596923828e-09, "reward_std": 1.0645757913589478, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07809130347153606, "rewards/wordcountpos_reward/raw_geo/std": 0.06638365163858437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1044.5, "completions/mean_terminated_length": 979.4285888671875, "completions/min_length": 666.0, "completions/min_terminated_length": 666.0, "epoch": 0.16283256651330266, "frac_reward_zero_std": 0.0, "grad_norm": 2.949445270254486, "kl": 0.006389617919921875, "learning_rate": 9.892944396312812e-07, "loss": -0.0408, "num_tokens": 35180528.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9647680521011353, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07753679244930345, "rewards/wordcountpos_reward/raw_geo/std": 0.13283745496342944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1095.5625, "completions/mean_terminated_length": 1095.5625, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16303260652130427, "frac_reward_zero_std": 0.0, "grad_norm": 1.0429789678872363, "kl": 0.0020971298217773438, "learning_rate": 9.892261989305264e-07, "loss": -0.001, "num_tokens": 35228905.0, "reward": 0.0, "reward_std": 0.5095144510269165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14381371010453364, "rewards/wordcountpos_reward/raw_geo/std": 0.09447399646724984, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15147423690002354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1012.25, "completions/mean_terminated_length": 1012.25, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.16323264652930586, "frac_reward_zero_std": 0.0, "grad_norm": 3.5555673393138703, "kl": 0.0089111328125, "learning_rate": 9.891577440610827e-07, "loss": -0.0515, "num_tokens": 35280949.0, "reward": 0.0, "reward_std": 0.7069839835166931, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14147677355016686, "rewards/wordcountpos_reward/raw_geo/std": 0.14644595010787564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1174.9375, "completions/mean_terminated_length": 1128.5, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.16343268653730747, "frac_reward_zero_std": 0.0, "grad_norm": 3.5562781994207597, "kl": 0.00789642333984375, "learning_rate": 9.89089075056329e-07, "loss": -0.0559, "num_tokens": 35325924.0, "reward": 0.0, "reward_std": 0.6456366777420044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014544088282210446, "rewards/wordcountpos_reward/raw_geo/std": 0.05298646855861031, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1016.4375, "completions/mean_terminated_length": 1016.4375, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.16363272654530905, "frac_reward_zero_std": 0.0, "grad_norm": 3.3597090792094257, "kl": 0.00727081298828125, "learning_rate": 9.890201919497482e-07, "loss": 0.0004, "num_tokens": 35367315.0, "reward": 0.0, "reward_std": 0.8697878122329712, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04491427464205154, "rewards/wordcountpos_reward/raw_geo/std": 0.09087934605437703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1397749513934347, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1111.875, "completions/mean_terminated_length": 1086.0, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.16383276655331067, "frac_reward_zero_std": 0.0, "grad_norm": 2.992608699653316, "kl": 0.006195068359375, "learning_rate": 9.889510947749282e-07, "loss": -0.0087, "num_tokens": 35408993.0, "reward": -1.4901161193847656e-08, "reward_std": 1.021742820739746, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1560479742922557, "rewards/wordcountpos_reward/raw_geo/std": 0.09691535501412255, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 977.1875, "completions/mean_terminated_length": 977.1875, "completions/min_length": 637.0, "completions/min_terminated_length": 637.0, "epoch": 0.16403280656131225, "frac_reward_zero_std": 0.0, "grad_norm": 3.0216239486390024, "kl": 0.00713348388671875, "learning_rate": 9.888817835655614e-07, "loss": -0.0938, "num_tokens": 35453844.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6564182639122009, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15543244519209015, "rewards/wordcountpos_reward/raw_geo/std": 0.12878481750210258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14497764834110988, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1061.5, "completions/mean_terminated_length": 1061.5, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.16423284656931386, "frac_reward_zero_std": 0.0, "grad_norm": 2.875493371725326, "kl": 0.00478363037109375, "learning_rate": 9.888122583554438e-07, "loss": 0.0313, "num_tokens": 35492476.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0196912288665771, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015551109633836213, "rewards/wordcountpos_reward/raw_geo/std": 0.10202990547716798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1133.4375, "completions/mean_terminated_length": 1109.0, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.16443288657731547, "frac_reward_zero_std": 0.0, "grad_norm": 3.647227448835252, "kl": 0.00818634033203125, "learning_rate": 9.887425191784765e-07, "loss": 0.0171, "num_tokens": 35533059.0, "reward": 4.470348358154297e-08, "reward_std": 0.8486392498016357, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01761084672418888, "rewards/wordcountpos_reward/raw_geo/std": 0.07303189988638512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 932.75, "completions/mean_terminated_length": 932.75, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.16463292658531706, "frac_reward_zero_std": 0.0, "grad_norm": 2.9147832188519938, "kl": 0.00701904296875, "learning_rate": 9.886725660686647e-07, "loss": 0.0091, "num_tokens": 35567119.0, "reward": 0.0, "reward_std": 0.5176118612289429, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09158306630554128, "rewards/wordcountpos_reward/raw_geo/std": 0.13005862147907943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.16483296659331867, "frac_reward_zero_std": 0.0, "grad_norm": 3.2968097928419806, "kl": 0.009613037109375, "learning_rate": 9.886023990601176e-07, "loss": -0.0033, "num_tokens": 35606357.0, "reward": 0.0, "reward_std": 0.6017299890518188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2324163070215693, "rewards/wordcountpos_reward/raw_geo/std": 0.058508599641958016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1152.875, "completions/mean_terminated_length": 1129.7333984375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.16503300660132025, "frac_reward_zero_std": 0.0, "grad_norm": 3.124815431908731, "kl": 0.01092529296875, "learning_rate": 9.88532018187049e-07, "loss": -0.0356, "num_tokens": 35652411.0, "reward": 7.450580596923828e-09, "reward_std": 1.0256050825119019, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.35022350323825163, "rewards/wordcountpos_reward/raw_geo/std": 0.07970165454230675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1238.0625, "completions/mean_terminated_length": 1238.0625, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.16523304660932187, "frac_reward_zero_std": 0.0, "grad_norm": 3.2579567456729834, "kl": 0.015838623046875, "learning_rate": 9.884614234837772e-07, "loss": -0.0185, "num_tokens": 35711548.0, "reward": 0.0, "reward_std": 1.0055289268493652, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14136925022695296, "rewards/wordcountpos_reward/raw_geo/std": 0.13019918711936887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1215.8125, "completions/mean_terminated_length": 1196.86669921875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.16543308661732348, "frac_reward_zero_std": 0.0, "grad_norm": 3.242224449434963, "kl": 0.0101318359375, "learning_rate": 9.88390614984724e-07, "loss": 0.0076, "num_tokens": 35761705.0, "reward": 0.0, "reward_std": 0.7001368403434753, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05096290442069456, "rewards/wordcountpos_reward/raw_geo/std": 0.1965121280241637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1122.875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.16563312662532506, "frac_reward_zero_std": 0.0, "grad_norm": 3.4414322312128554, "kl": 0.00727081298828125, "learning_rate": 9.883195927244165e-07, "loss": -0.0223, "num_tokens": 35802279.0, "reward": -5.960464477539063e-08, "reward_std": 1.0034435987472534, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1128687333551407, "rewards/wordcountpos_reward/raw_geo/std": 0.12751682723874852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 970.125, "completions/mean_terminated_length": 970.125, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.16583316663332667, "frac_reward_zero_std": 0.0, "grad_norm": 3.681751323837442, "kl": 0.0074920654296875, "learning_rate": 9.882483567374851e-07, "loss": 0.0099, "num_tokens": 35842025.0, "reward": -1.862645149230957e-08, "reward_std": 1.0243802070617676, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05534024647966182, "rewards/wordcountpos_reward/raw_geo/std": 0.10015460743881506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 956.0, "completions/mean_terminated_length": 956.0, "completions/min_length": 599.0, "completions/min_terminated_length": 599.0, "epoch": 0.16603320664132826, "frac_reward_zero_std": 0.0, "grad_norm": 2.721287053945097, "kl": 0.010284423828125, "learning_rate": 9.881769070586648e-07, "loss": 0.0068, "num_tokens": 35884985.0, "reward": 1.862645149230957e-09, "reward_std": 1.0149438381195068, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.023655234470058274, "rewards/wordcountpos_reward/raw_geo/std": 0.09388713262494991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1159.3125, "completions/mean_terminated_length": 1080.6923828125, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.16623324664932987, "frac_reward_zero_std": 0.0, "grad_norm": 3.0206824384871216, "kl": 0.01027679443359375, "learning_rate": 9.881052437227952e-07, "loss": -0.0319, "num_tokens": 35929430.0, "reward": 0.0, "reward_std": 0.7520310878753662, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06532731103826626, "rewards/wordcountpos_reward/raw_geo/std": 0.06818029730548754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12765694770084507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1090.375, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.16643328665733145, "frac_reward_zero_std": 0.0, "grad_norm": 3.4711476983162988, "kl": 0.0103912353515625, "learning_rate": 9.88033366764819e-07, "loss": -0.0206, "num_tokens": 35972844.0, "reward": 7.450580596923828e-09, "reward_std": 1.0510834455490112, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03350021858584557, "rewards/wordcountpos_reward/raw_geo/std": 0.16770095998812162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.16663332666533306, "frac_reward_zero_std": 0.0, "grad_norm": 3.3259332391092387, "kl": 0.00848388671875, "learning_rate": 9.879612762197843e-07, "loss": 0.0148, "num_tokens": 36009006.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8108147978782654, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026350005905452747, "rewards/wordcountpos_reward/raw_geo/std": 0.11636202878194005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1231.875, "completions/mean_terminated_length": 1214.0001220703125, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.16683336667333468, "frac_reward_zero_std": 0.0, "grad_norm": 3.3645760741066466, "kl": 0.0110931396484375, "learning_rate": 9.878889721228426e-07, "loss": -0.0166, "num_tokens": 36044804.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7991166114807129, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16169766228623994, "rewards/wordcountpos_reward/raw_geo/std": 0.1746051860732521, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1292.6875, "completions/mean_terminated_length": 1131.4444580078125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.16703340668133626, "frac_reward_zero_std": 0.0, "grad_norm": 3.153574265973643, "kl": 0.0108642578125, "learning_rate": 9.878164545092496e-07, "loss": -0.0421, "num_tokens": 36099175.0, "reward": 5.960464477539063e-08, "reward_std": 0.2914598882198334, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07879924866288396, "rewards/wordcountpos_reward/raw_geo/std": 0.11702559558664888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1213.625, "completions/mean_terminated_length": 1118.166748046875, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.16723344668933787, "frac_reward_zero_std": 0.0, "grad_norm": 3.1085429149419626, "kl": 0.00798797607421875, "learning_rate": 9.877437234143653e-07, "loss": -0.0734, "num_tokens": 36141065.0, "reward": 0.0, "reward_std": 0.9657445549964905, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024942923094714117, "rewards/wordcountpos_reward/raw_geo/std": 0.2931455338558853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1196.0, "completions/mean_terminated_length": 1175.7333984375, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.16743348669733946, "frac_reward_zero_std": 0.0, "grad_norm": 3.2298411346750173, "kl": 0.01171875, "learning_rate": 9.876707788736539e-07, "loss": -0.0261, "num_tokens": 36186697.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5676624178886414, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10441925263890464, "rewards/wordcountpos_reward/raw_geo/std": 0.16120070604233722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1212.75, "completions/mean_terminated_length": 1193.60009765625, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.16763352670534107, "frac_reward_zero_std": 0.0, "grad_norm": 2.9016260329532506, "kl": 0.0107269287109375, "learning_rate": 9.87597620922683e-07, "loss": -0.0279, "num_tokens": 36233013.0, "reward": 0.0, "reward_std": 0.8022392392158508, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025178464281762902, "rewards/wordcountpos_reward/raw_geo/std": 0.12770867486555654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1093.1875, "completions/mean_terminated_length": 1093.1875, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.16783356671334268, "frac_reward_zero_std": 0.0, "grad_norm": 2.675755047730772, "kl": 0.00608062744140625, "learning_rate": 9.875242495971252e-07, "loss": -0.0327, "num_tokens": 36275192.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4860745370388031, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19239438501046435, "rewards/wordcountpos_reward/raw_geo/std": 0.22574746432186216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.16803360672134426, "frac_reward_zero_std": 0.0, "grad_norm": 3.2960290289495062, "kl": 0.0106201171875, "learning_rate": 9.874506649327567e-07, "loss": -0.0314, "num_tokens": 36318643.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6247957348823547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07496458962923289, "rewards/wordcountpos_reward/raw_geo/std": 0.07462757331304543, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1066.75, "completions/mean_terminated_length": 1066.75, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.16823364672934588, "frac_reward_zero_std": 0.0, "grad_norm": 3.570090926509135, "kl": 0.0112457275390625, "learning_rate": 9.873768669654575e-07, "loss": -0.0123, "num_tokens": 36356287.0, "reward": -7.450580596923828e-09, "reward_std": 1.0390896797180176, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.01660654291588695, "rewards/wordcountpos_reward/raw_geo/std": 0.1253223571789788, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1586400537905439, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1179.5625, "completions/mean_terminated_length": 1179.5625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.16843368673734746, "frac_reward_zero_std": 0.0, "grad_norm": 2.8365201872296817, "kl": 0.007781982421875, "learning_rate": 9.873028557312117e-07, "loss": 0.029, "num_tokens": 36402400.0, "reward": 0.0, "reward_std": 0.4980078935623169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22886109529388468, "rewards/wordcountpos_reward/raw_geo/std": 0.11052645354135353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1266.0, "completions/mean_length": 1103.875, "completions/mean_terminated_length": 1077.4666748046875, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.16863372674534907, "frac_reward_zero_std": 0.0, "grad_norm": 3.646939344659468, "kl": 0.0114593505859375, "learning_rate": 9.872286312661077e-07, "loss": -0.0305, "num_tokens": 36451870.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8521960973739624, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05802641712921642, "rewards/wordcountpos_reward/raw_geo/std": 0.11510962727771128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17299111516469837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1181.0625, "completions/mean_terminated_length": 1181.0625, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.16883376675335068, "frac_reward_zero_std": 0.0, "grad_norm": 2.580314473414636, "kl": 0.0074310302734375, "learning_rate": 9.87154193606338e-07, "loss": -0.0082, "num_tokens": 36489727.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9301730394363403, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06548249343952822, "rewards/wordcountpos_reward/raw_geo/std": 0.2759959113227013, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1142.9375, "completions/mean_terminated_length": 980.6364135742188, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.16903380676135227, "frac_reward_zero_std": 0.0, "grad_norm": 3.3568736971519644, "kl": 0.010650634765625, "learning_rate": 9.87079542788198e-07, "loss": -0.0385, "num_tokens": 36543862.0, "reward": 0.0, "reward_std": 0.8167658448219299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03506218231564674, "rewards/wordcountpos_reward/raw_geo/std": 0.03345129712581062, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923409, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1218.375, "completions/mean_terminated_length": 1178.1429443359375, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.16923384676935388, "frac_reward_zero_std": 0.0, "grad_norm": 3.239560731686645, "kl": 0.0119171142578125, "learning_rate": 9.870046788480884e-07, "loss": 0.0102, "num_tokens": 36595604.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9169098138809204, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08550238941997734, "rewards/wordcountpos_reward/raw_geo/std": 0.13310127448859277, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1342.25, "completions/mean_terminated_length": 1270.5455322265625, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.16943388677735546, "frac_reward_zero_std": 0.0, "grad_norm": 2.346578205904879, "kl": 0.005908966064453125, "learning_rate": 9.86929601822513e-07, "loss": -0.0033, "num_tokens": 36639392.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9574987888336182, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059590097014098446, "rewards/wordcountpos_reward/raw_geo/std": 0.08313618948298293, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1082.3125, "completions/mean_terminated_length": 1082.3125, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.16963392678535708, "frac_reward_zero_std": 0.0, "grad_norm": 2.2516611101547217, "kl": 0.00492095947265625, "learning_rate": 9.868543117480798e-07, "loss": -0.0135, "num_tokens": 36676021.0, "reward": 0.0, "reward_std": 0.9351532459259033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022385136957765714, "rewards/wordcountpos_reward/raw_geo/std": 0.06909719484897522, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1240.0, "completions/mean_terminated_length": 1202.857177734375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.16983396679335866, "frac_reward_zero_std": 0.0, "grad_norm": 3.4621050117357357, "kl": 0.01080322265625, "learning_rate": 9.867788086615001e-07, "loss": -0.0077, "num_tokens": 36726237.0, "reward": 0.0, "reward_std": 0.6256131529808044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1307869089018581, "rewards/wordcountpos_reward/raw_geo/std": 0.24786893527117212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 969.875, "completions/mean_terminated_length": 969.875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.17003400680136027, "frac_reward_zero_std": 0.0, "grad_norm": 3.471817323463469, "kl": 0.0121307373046875, "learning_rate": 9.867030925995905e-07, "loss": -0.0152, "num_tokens": 36774363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8764817714691162, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06636729819860808, "rewards/wordcountpos_reward/raw_geo/std": 0.09451279792465354, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1035.0, "completions/mean_terminated_length": 1035.0, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.17023404680936188, "frac_reward_zero_std": 0.0, "grad_norm": 3.5072299805195826, "kl": 0.0092315673828125, "learning_rate": 9.866271635992694e-07, "loss": -0.0022, "num_tokens": 36820011.0, "reward": 0.0, "reward_std": 0.9494242072105408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035242739353180536, "rewards/wordcountpos_reward/raw_geo/std": 0.05995409400013748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1171.4375, "completions/mean_terminated_length": 1061.916748046875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.17043408681736347, "frac_reward_zero_std": 0.0, "grad_norm": 3.22865222888431, "kl": 0.00585174560546875, "learning_rate": 9.86551021697561e-07, "loss": -0.0285, "num_tokens": 36869810.0, "reward": 0.0, "reward_std": 0.9019524455070496, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029357918366290644, "rewards/wordcountpos_reward/raw_geo/std": 0.07790593362666698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1233.6875, "completions/mean_terminated_length": 1172.2308349609375, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.17063412682536508, "frac_reward_zero_std": 0.0, "grad_norm": 2.8256145218580735, "kl": 0.00804901123046875, "learning_rate": 9.864746669315918e-07, "loss": -0.0538, "num_tokens": 36917765.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6553990840911865, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1237890383112415, "rewards/wordcountpos_reward/raw_geo/std": 0.07707539071075298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1152.5, "completions/mean_terminated_length": 1102.857177734375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.17083416683336666, "frac_reward_zero_std": 0.0, "grad_norm": 3.3346319391627945, "kl": 0.009765625, "learning_rate": 9.863980993385931e-07, "loss": -0.1352, "num_tokens": 36959477.0, "reward": 0.0, "reward_std": 0.8577319979667664, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09534393491413318, "rewards/wordcountpos_reward/raw_geo/std": 0.17586537122696005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17924739783224086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1266.875, "completions/mean_terminated_length": 1213.0770263671875, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.17103420684136827, "frac_reward_zero_std": 0.0, "grad_norm": 2.7530032365567605, "kl": 0.00656890869140625, "learning_rate": 9.863213189558996e-07, "loss": 0.0237, "num_tokens": 37002299.0, "reward": 0.0, "reward_std": 0.6090317964553833, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12083198358743402, "rewards/wordcountpos_reward/raw_geo/std": 0.12608577630200732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1081.5625, "completions/mean_terminated_length": 1081.5625, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.1712342468493699, "frac_reward_zero_std": 0.0, "grad_norm": 2.97957555214881, "kl": 0.00861358642578125, "learning_rate": 9.862443258209496e-07, "loss": 0.0027, "num_tokens": 37046996.0, "reward": 0.0, "reward_std": 0.7198086380958557, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0016181737007230192, "rewards/wordcountpos_reward/raw_geo/std": 0.16140078299663677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1014.6875, "completions/mean_terminated_length": 1014.6875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.17143428685737147, "frac_reward_zero_std": 0.0, "grad_norm": 3.5161266672864495, "kl": 0.0102996826171875, "learning_rate": 9.861671199712855e-07, "loss": -0.0269, "num_tokens": 37090447.0, "reward": 0.0, "reward_std": 0.809076189994812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01242068897396777, "rewards/wordcountpos_reward/raw_geo/std": 0.0716704015239749, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1280.5625, "completions/mean_terminated_length": 1207.416748046875, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.17163432686537308, "frac_reward_zero_std": 0.0, "grad_norm": 2.7494771077513507, "kl": 0.007396697998046875, "learning_rate": 9.86089701444553e-07, "loss": -0.0157, "num_tokens": 37133632.0, "reward": 0.0, "reward_std": 0.5005857348442078, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.044719628188765674, "rewards/wordcountpos_reward/raw_geo/std": 0.06758191509380684, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.2345997379304526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1036.625, "completions/mean_terminated_length": 1036.625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.17183436687337467, "frac_reward_zero_std": 0.0, "grad_norm": 3.501125289802964, "kl": 0.00848388671875, "learning_rate": 9.86012070278502e-07, "loss": -0.0239, "num_tokens": 37178698.0, "reward": 2.9802322387695312e-08, "reward_std": 0.36654722690582275, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21775856565220852, "rewards/wordcountpos_reward/raw_geo/std": 0.23885335881978279, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18993176162525865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1163.375, "completions/mean_terminated_length": 1163.375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.17203440688137628, "frac_reward_zero_std": 0.0, "grad_norm": 3.1902520958967617, "kl": 0.00782012939453125, "learning_rate": 9.859342265109856e-07, "loss": 0.0046, "num_tokens": 37227416.0, "reward": 0.0, "reward_std": 0.9346041679382324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.054860423844448405, "rewards/wordcountpos_reward/raw_geo/std": 0.29577180819468957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1275.0625, "completions/mean_terminated_length": 1140.0999755859375, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.17223444688937786, "frac_reward_zero_std": 0.0, "grad_norm": 3.6784830776347937, "kl": 0.0117340087890625, "learning_rate": 9.858561701799606e-07, "loss": -0.0267, "num_tokens": 37282897.0, "reward": -2.2351741790771484e-08, "reward_std": 1.056344747543335, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0330270012555121, "rewards/wordcountpos_reward/raw_geo/std": 0.10900429159144488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1164.75, "completions/mean_terminated_length": 1116.857177734375, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.17243448689737947, "frac_reward_zero_std": 0.0, "grad_norm": 3.3338210402426216, "kl": 0.00772857666015625, "learning_rate": 9.85777901323488e-07, "loss": 0.0154, "num_tokens": 37326093.0, "reward": 0.0, "reward_std": 0.8620393872261047, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11497980763880261, "rewards/wordcountpos_reward/raw_geo/std": 0.2557735068149251, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 997.3125, "completions/mean_terminated_length": 963.800048828125, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.17263452690538109, "frac_reward_zero_std": 0.0, "grad_norm": 2.9005813806817704, "kl": 0.00928497314453125, "learning_rate": 9.856994199797317e-07, "loss": -0.0495, "num_tokens": 37366794.0, "reward": 0.0, "reward_std": 1.0504605770111084, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05551411403029644, "rewards/wordcountpos_reward/raw_geo/std": 0.07440455569604205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14298407059684815, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1168.75, "completions/mean_terminated_length": 1168.75, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.17283456691338267, "frac_reward_zero_std": 0.0, "grad_norm": 2.8040178548787384, "kl": 0.00560760498046875, "learning_rate": 9.8562072618696e-07, "loss": 0.0257, "num_tokens": 37400982.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0157716274261475, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044969030900224624, "rewards/wordcountpos_reward/raw_geo/std": 0.08188372432840825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1213.5625, "completions/mean_terminated_length": 1194.4666748046875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.17303460692138428, "frac_reward_zero_std": 0.0, "grad_norm": 3.431958043182085, "kl": 0.00848388671875, "learning_rate": 9.85541819983544e-07, "loss": 0.0076, "num_tokens": 37443231.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0461863279342651, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19003258497671213, "rewards/wordcountpos_reward/raw_geo/std": 0.26364405035333305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1374.75, "completions/mean_terminated_length": 1345.84619140625, "completions/min_length": 1199.0, "completions/min_terminated_length": 1199.0, "epoch": 0.17323464692938587, "frac_reward_zero_std": 0.0, "grad_norm": 2.3402116113958384, "kl": 0.003650665283203125, "learning_rate": 9.854627014079588e-07, "loss": 0.0031, "num_tokens": 37488171.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8610503673553467, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04421237593881027, "rewards/wordcountpos_reward/raw_geo/std": 0.2047881506706108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327549, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1103.0625, "completions/mean_terminated_length": 1103.0625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.17343468693738748, "frac_reward_zero_std": 0.0, "grad_norm": 3.558773114190285, "kl": 0.0107879638671875, "learning_rate": 9.853833704987831e-07, "loss": -0.003, "num_tokens": 37539396.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0214763879776, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2440186421896916, "rewards/wordcountpos_reward/raw_geo/std": 0.2170722808575906, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1070.0, "completions/max_terminated_length": 1070.0, "completions/mean_length": 951.125, "completions/mean_terminated_length": 951.125, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.1736347269453891, "frac_reward_zero_std": 0.0, "grad_norm": 3.139169286310432, "kl": 0.004642486572265625, "learning_rate": 9.85303827294699e-07, "loss": -0.0059, "num_tokens": 37576446.0, "reward": 0.0, "reward_std": 1.0224297046661377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12126069886016867, "rewards/wordcountpos_reward/raw_geo/std": 0.08884290870369663, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1229.5625, "completions/mean_terminated_length": 1229.5625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.17383476695339067, "frac_reward_zero_std": 0.0, "grad_norm": 3.2712650974992714, "kl": 0.009857177734375, "learning_rate": 9.852240718344919e-07, "loss": -0.0169, "num_tokens": 37625575.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6319416165351868, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20580616187448658, "rewards/wordcountpos_reward/raw_geo/std": 0.3384619816635388, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1243.1875, "completions/mean_terminated_length": 1183.923095703125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.17403480696139229, "frac_reward_zero_std": 0.0, "grad_norm": 2.986178046304281, "kl": 0.00699615478515625, "learning_rate": 9.85144104157051e-07, "loss": -0.0229, "num_tokens": 37680562.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8674370050430298, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007906180059571218, "rewards/wordcountpos_reward/raw_geo/std": 0.05594172712157067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1166.1875, "completions/mean_terminated_length": 1089.1539306640625, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.17423484696939387, "frac_reward_zero_std": 0.0, "grad_norm": 2.794130420549919, "kl": 0.006622314453125, "learning_rate": 9.85063924301369e-07, "loss": -0.0089, "num_tokens": 37722685.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9355672597885132, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007497083548027235, "rewards/wordcountpos_reward/raw_geo/std": 0.15743106226327738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 993.625, "completions/mean_terminated_length": 993.625, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.17443488697739548, "frac_reward_zero_std": 0.0, "grad_norm": 2.4544196302680175, "kl": 0.00536346435546875, "learning_rate": 9.84983532306542e-07, "loss": 0.0103, "num_tokens": 37755543.0, "reward": 0.0, "reward_std": 1.0122637748718262, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0657117590214513, "rewards/wordcountpos_reward/raw_geo/std": 0.04112101067461087, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1095.6875, "completions/mean_terminated_length": 1095.6875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.1746349269853971, "frac_reward_zero_std": 0.0, "grad_norm": 3.2791711267773946, "kl": 0.008941650390625, "learning_rate": 9.849029282117692e-07, "loss": -0.0442, "num_tokens": 37798506.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7395102977752686, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13161597270699313, "rewards/wordcountpos_reward/raw_geo/std": 0.26481364284519454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1212.0625, "completions/mean_terminated_length": 1212.0625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.17483496699339868, "frac_reward_zero_std": 0.0, "grad_norm": 3.2264562739264093, "kl": 0.007110595703125, "learning_rate": 9.84822112056354e-07, "loss": 0.0071, "num_tokens": 37842683.0, "reward": -2.9802322387695312e-08, "reward_std": 0.783845841884613, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11506658318714942, "rewards/wordcountpos_reward/raw_geo/std": 0.12302823309476989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1228.75, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.1750350070014003, "frac_reward_zero_std": 0.0, "grad_norm": 3.4114230524533706, "kl": 0.0085906982421875, "learning_rate": 9.847410838797023e-07, "loss": -0.0772, "num_tokens": 37885023.0, "reward": 1.6763806343078613e-08, "reward_std": 1.0192160606384277, "rewards/wordcountpos_reward/mean": 1.6763806343078613e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15263395326928883, "rewards/wordcountpos_reward/raw_geo/std": 0.08672447917645187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1290.5625, "completions/mean_terminated_length": 1276.60009765625, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.17523504700940187, "frac_reward_zero_std": 0.0, "grad_norm": 2.5893669634215977, "kl": 0.00649261474609375, "learning_rate": 9.846598437213241e-07, "loss": -0.0134, "num_tokens": 37932384.0, "reward": 0.0, "reward_std": 0.5021741986274719, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0873640607521561, "rewards/wordcountpos_reward/raw_geo/std": 0.2444931068678306, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1293.5, "completions/mean_terminated_length": 1245.84619140625, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.17543508701740348, "frac_reward_zero_std": 0.0, "grad_norm": 2.9555129026048723, "kl": 0.00737762451171875, "learning_rate": 9.845783916208325e-07, "loss": 0.0133, "num_tokens": 37973256.0, "reward": 0.0, "reward_std": 0.9664082527160645, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08951282854205395, "rewards/wordcountpos_reward/raw_geo/std": 0.07160204705473998, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1129.5, "completions/mean_terminated_length": 1129.5, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.17563512702540507, "frac_reward_zero_std": 0.0, "grad_norm": 3.5319703710550185, "kl": 0.0093994140625, "learning_rate": 9.844967276179435e-07, "loss": -0.0171, "num_tokens": 38024640.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0311518907546997, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12415629319911645, "rewards/wordcountpos_reward/raw_geo/std": 0.10857688943831809, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1052.5, "completions/mean_terminated_length": 1022.666748046875, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.17583516703340668, "frac_reward_zero_std": 0.0, "grad_norm": 3.1798805596283968, "kl": 0.00890350341796875, "learning_rate": 9.844148517524772e-07, "loss": -0.0122, "num_tokens": 38062472.0, "reward": 1.4901161193847656e-08, "reward_std": 0.994231104850769, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19620229254848678, "rewards/wordcountpos_reward/raw_geo/std": 0.08364270125635723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1186.8125, "completions/mean_terminated_length": 1165.933349609375, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.1760352070414083, "frac_reward_zero_std": 0.0, "grad_norm": 2.613293452292534, "kl": 0.007110595703125, "learning_rate": 9.843327640643566e-07, "loss": 0.03, "num_tokens": 38100653.0, "reward": 0.0, "reward_std": 0.6162838935852051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03411647417153821, "rewards/wordcountpos_reward/raw_geo/std": 0.113682835186528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1260.0625, "completions/mean_terminated_length": 1225.7857666015625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.17623524704940988, "frac_reward_zero_std": 0.0, "grad_norm": 2.8256290691578374, "kl": 0.00856781005859375, "learning_rate": 9.842504645936078e-07, "loss": -0.0471, "num_tokens": 38153846.0, "reward": 0.0, "reward_std": 0.6246308088302612, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.012818517251148596, "rewards/wordcountpos_reward/raw_geo/std": 0.269361004862285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 1076.375, "completions/mean_terminated_length": 1076.375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1764352870574115, "frac_reward_zero_std": 0.0, "grad_norm": 3.4294388687230777, "kl": 0.00981903076171875, "learning_rate": 9.84167953380361e-07, "loss": -0.0163, "num_tokens": 38195596.0, "reward": -7.450580596923828e-09, "reward_std": 1.0519238710403442, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09750724834092946, "rewards/wordcountpos_reward/raw_geo/std": 0.045521534897174275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1173.375, "completions/mean_terminated_length": 1126.71435546875, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.17663532706541307, "frac_reward_zero_std": 0.0, "grad_norm": 3.324748927064528, "kl": 0.00524139404296875, "learning_rate": 9.840852304648481e-07, "loss": -0.0903, "num_tokens": 38244490.0, "reward": 3.725290298461914e-09, "reward_std": 1.0523391962051392, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.029037714097839465, "rewards/wordcountpos_reward/raw_geo/std": 0.19357625351024577, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1146.9375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.17683536707341468, "frac_reward_zero_std": 0.0, "grad_norm": 2.321176089301189, "kl": 0.00499725341796875, "learning_rate": 9.84002295887406e-07, "loss": -0.0137, "num_tokens": 38295665.0, "reward": 0.0, "reward_std": 0.9161946773529053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06034120247129848, "rewards/wordcountpos_reward/raw_geo/std": 0.08911955422612272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1344.0625, "completions/mean_terminated_length": 1250.5, "completions/min_length": 1115.0, "completions/min_terminated_length": 1115.0, "epoch": 0.1770354070814163, "frac_reward_zero_std": 0.0, "grad_norm": 2.62096170888923, "kl": 0.008819580078125, "learning_rate": 9.839191496884736e-07, "loss": -0.0153, "num_tokens": 38351202.0, "reward": 0.0, "reward_std": 0.6652466058731079, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0313640343049726, "rewards/wordcountpos_reward/raw_geo/std": 0.09490847987615997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1224.4375, "completions/mean_terminated_length": 1160.84619140625, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.17723544708941788, "frac_reward_zero_std": 0.0, "grad_norm": 2.1787585672285625, "kl": 0.00435638427734375, "learning_rate": 9.838357919085933e-07, "loss": 0.0222, "num_tokens": 38397889.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0030863285064697, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1977029944399264, "rewards/wordcountpos_reward/raw_geo/std": 0.14663613845897422, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1193.5625, "completions/mean_terminated_length": 1149.7857666015625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.1774354870974195, "frac_reward_zero_std": 0.0, "grad_norm": 2.1622229550918166, "kl": 0.0045013427734375, "learning_rate": 9.83752222588411e-07, "loss": 0.0205, "num_tokens": 38451378.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8524837493896484, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06193691803381681, "rewards/wordcountpos_reward/raw_geo/std": 0.06287351706635681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1137.75, "completions/mean_terminated_length": 1137.75, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.17763552710542108, "frac_reward_zero_std": 0.0, "grad_norm": 3.5089046382192826, "kl": 0.008880615234375, "learning_rate": 9.836684417686754e-07, "loss": 0.0022, "num_tokens": 38504710.0, "reward": 3.725290298461914e-09, "reward_std": 1.0668672323226929, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.198020197803301, "rewards/wordcountpos_reward/raw_geo/std": 0.1000581027422953, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042253, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1086.8125, "completions/mean_terminated_length": 1086.8125, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.1778355671134227, "frac_reward_zero_std": 0.0, "grad_norm": 2.9316485665738896, "kl": 0.00653076171875, "learning_rate": 9.835844494902381e-07, "loss": -0.0053, "num_tokens": 38547659.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9732360243797302, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13120952575304273, "rewards/wordcountpos_reward/raw_geo/std": 0.15881552924067985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1176.75, "completions/mean_terminated_length": 1102.1539306640625, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.17803560712142427, "frac_reward_zero_std": 0.0, "grad_norm": 2.4367797913602853, "kl": 0.00501251220703125, "learning_rate": 9.835002457940543e-07, "loss": 0.0488, "num_tokens": 38598455.0, "reward": 0.0, "reward_std": 0.9611717462539673, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.047756964343114404, "rewards/wordcountpos_reward/raw_geo/std": 0.18119012155036263, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 987.7333984375, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.17823564712942588, "frac_reward_zero_std": 0.0, "grad_norm": 3.8807606846051383, "kl": 0.00872039794921875, "learning_rate": 9.834158307211825e-07, "loss": 0.0952, "num_tokens": 38628491.0, "reward": 0.0, "reward_std": 0.8411787748336792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1457158373757128, "rewards/wordcountpos_reward/raw_geo/std": 0.1014224524068365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1326.625, "completions/mean_terminated_length": 1286.615478515625, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.1784356871374275, "frac_reward_zero_std": 0.0, "grad_norm": 3.0549532092651024, "kl": 0.00742340087890625, "learning_rate": 9.833312043127835e-07, "loss": 0.0129, "num_tokens": 38667797.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5767874121665955, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21452767587548643, "rewards/wordcountpos_reward/raw_geo/std": 0.23639937938187505, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 945.875, "completions/mean_terminated_length": 945.875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.17863572714542908, "frac_reward_zero_std": 0.0, "grad_norm": 3.631812723072941, "kl": 0.00806427001953125, "learning_rate": 9.832463666101215e-07, "loss": 0.0289, "num_tokens": 38711539.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0570076704025269, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016045338430651354, "rewards/wordcountpos_reward/raw_geo/std": 0.0602284217619523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 1022.8125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.1788357671534307, "frac_reward_zero_std": 0.0, "grad_norm": 2.80153128658189, "kl": 0.00571441650390625, "learning_rate": 9.831613176545637e-07, "loss": 0.0138, "num_tokens": 38747176.0, "reward": 0.0, "reward_std": 0.8905612230300903, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04089018645959631, "rewards/wordcountpos_reward/raw_geo/std": 0.04129331106827069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1156.0, "completions/mean_terminated_length": 1156.0, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.17903580716143228, "frac_reward_zero_std": 0.0, "grad_norm": 2.3144176664324854, "kl": 0.005451202392578125, "learning_rate": 9.830760574875806e-07, "loss": 0.0055, "num_tokens": 38786768.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6741729974746704, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08762354669870973, "rewards/wordcountpos_reward/raw_geo/std": 0.052466024193268776, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1266.8125, "completions/mean_terminated_length": 1251.2667236328125, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.1792358471694339, "frac_reward_zero_std": 0.0, "grad_norm": 2.522072825111293, "kl": 0.0058135986328125, "learning_rate": 9.829905861507453e-07, "loss": 0.0039, "num_tokens": 38833005.0, "reward": 5.960464477539063e-08, "reward_std": 0.7966471910476685, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07486677155101398, "rewards/wordcountpos_reward/raw_geo/std": 0.10715636968913805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1266.8125, "completions/mean_terminated_length": 1213.0, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.1794358871774355, "frac_reward_zero_std": 0.0, "grad_norm": 2.8231646880918118, "kl": 0.00652313232421875, "learning_rate": 9.829049036857338e-07, "loss": -0.0262, "num_tokens": 38884874.0, "reward": 0.0, "reward_std": 0.9405481815338135, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.032435498550571734, "rewards/wordcountpos_reward/raw_geo/std": 0.14189761258588804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1118.5625, "completions/mean_terminated_length": 1118.5625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.17963592718543708, "frac_reward_zero_std": 0.0, "grad_norm": 2.841819918917027, "kl": 0.00865936279296875, "learning_rate": 9.82819010134326e-07, "loss": 0.0175, "num_tokens": 38930763.0, "reward": 0.0, "reward_std": 0.8674912452697754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.179958831942811, "rewards/wordcountpos_reward/raw_geo/std": 0.1269351332976292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 910.1875, "completions/mean_terminated_length": 910.1875, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.1798359671934387, "frac_reward_zero_std": 0.0, "grad_norm": 4.234469351243712, "kl": 0.0094757080078125, "learning_rate": 9.827329055384031e-07, "loss": 0.0094, "num_tokens": 38970670.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9956492185592651, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012990841045994963, "rewards/wordcountpos_reward/raw_geo/std": 0.0971725266844798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1197.0625, "completions/mean_terminated_length": 1015.2999877929688, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.18003600720144028, "frac_reward_zero_std": 0.0, "grad_norm": 2.8201012187429466, "kl": 0.0067901611328125, "learning_rate": 9.826465899399504e-07, "loss": -0.0114, "num_tokens": 39022455.0, "reward": 0.0, "reward_std": 0.6320232152938843, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16034238006822255, "rewards/wordcountpos_reward/raw_geo/std": 0.100569567402025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1270.375, "completions/mean_terminated_length": 1255.0667724609375, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.1802360472094419, "frac_reward_zero_std": 0.0, "grad_norm": 3.213694438490083, "kl": 0.0090179443359375, "learning_rate": 9.82560063381056e-07, "loss": 0.0035, "num_tokens": 39069997.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9283663034439087, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13790807469382152, "rewards/wordcountpos_reward/raw_geo/std": 0.15510724745403243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1206.5, "completions/mean_terminated_length": 1206.5, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.18043608721744347, "frac_reward_zero_std": 0.0, "grad_norm": 2.8537674851302355, "kl": 0.00579833984375, "learning_rate": 9.824733259039104e-07, "loss": -0.0028, "num_tokens": 39111717.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9624330997467041, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18702226892966964, "rewards/wordcountpos_reward/raw_geo/std": 0.14591069436031148, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1246.6875, "completions/mean_terminated_length": 1210.5, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.1806361272254451, "frac_reward_zero_std": 0.0, "grad_norm": 2.813198369721467, "kl": 0.00720977783203125, "learning_rate": 9.823863775508072e-07, "loss": -0.0231, "num_tokens": 39159000.0, "reward": -2.9802322387695312e-08, "reward_std": 1.008222222328186, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04658171978607431, "rewards/wordcountpos_reward/raw_geo/std": 0.06365656350168548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14851112939963645, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1140.125, "completions/mean_terminated_length": 1140.125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.1808361672334467, "frac_reward_zero_std": 0.0, "grad_norm": 3.093059159319179, "kl": 0.00778961181640625, "learning_rate": 9.822992183641429e-07, "loss": -0.0097, "num_tokens": 39210730.0, "reward": 5.960464477539063e-08, "reward_std": 0.7320858240127563, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046864234907353926, "rewards/wordcountpos_reward/raw_geo/std": 0.15270220424044126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1400.0, "completions/mean_terminated_length": 1322.2222900390625, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.18103620724144828, "frac_reward_zero_std": 0.0, "grad_norm": 2.809287292883197, "kl": 0.0068206787109375, "learning_rate": 9.822118483864167e-07, "loss": 0.0086, "num_tokens": 39260570.0, "reward": 0.0, "reward_std": 0.9606954455375671, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10997422892097904, "rewards/wordcountpos_reward/raw_geo/std": 0.1741691170159194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 846.9375, "completions/mean_terminated_length": 846.9375, "completions/min_length": 508.0, "completions/min_terminated_length": 508.0, "epoch": 0.1812362472494499, "frac_reward_zero_std": 0.0, "grad_norm": 3.5644179744507105, "kl": 0.00801849365234375, "learning_rate": 9.821242676602308e-07, "loss": -0.0214, "num_tokens": 39286105.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7069605588912964, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.047907115663022824, "rewards/wordcountpos_reward/raw_geo/std": 0.0681875910477015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1231.0625, "completions/mean_terminated_length": 1169.0, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.18143628725745148, "frac_reward_zero_std": 0.0, "grad_norm": 3.3476209440387974, "kl": 0.0085601806640625, "learning_rate": 9.820364762282896e-07, "loss": 0.0176, "num_tokens": 39331026.0, "reward": 0.0, "reward_std": 0.7648845911026001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.247400421269785, "rewards/wordcountpos_reward/raw_geo/std": 0.3115951254045245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1049.916748046875, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.1816363272654531, "frac_reward_zero_std": 0.0, "grad_norm": 3.031334611482978, "kl": 0.00675201416015625, "learning_rate": 9.819484741334009e-07, "loss": 0.0396, "num_tokens": 39366257.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0533500909805298, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13639401083969963, "rewards/wordcountpos_reward/raw_geo/std": 0.24310983160467106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1082.9375, "completions/mean_terminated_length": 1082.9375, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.1818363672734547, "frac_reward_zero_std": 0.0, "grad_norm": 2.915696897818177, "kl": 0.007659912109375, "learning_rate": 9.818602614184745e-07, "loss": -0.0303, "num_tokens": 39409560.0, "reward": 0.0, "reward_std": 0.981995701789856, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.057409482703430145, "rewards/wordcountpos_reward/raw_geo/std": 0.05842319473447109, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1054.625, "completions/mean_terminated_length": 1024.933349609375, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.1820364072814563, "frac_reward_zero_std": 0.0, "grad_norm": 3.095718240065994, "kl": 0.00711822509765625, "learning_rate": 9.817718381265238e-07, "loss": 0.0101, "num_tokens": 39451130.0, "reward": 0.0, "reward_std": 0.645683765411377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.059412785608560406, "rewards/wordcountpos_reward/raw_geo/std": 0.04698696168490984, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1209.3125, "completions/mean_terminated_length": 1189.933349609375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.1822364472894579, "frac_reward_zero_std": 0.0, "grad_norm": 2.9231185689219443, "kl": 0.00673675537109375, "learning_rate": 9.81683204300664e-07, "loss": -0.058, "num_tokens": 39493047.0, "reward": 7.450580596923828e-09, "reward_std": 1.0139415264129639, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07672315141229276, "rewards/wordcountpos_reward/raw_geo/std": 0.05076262441087369, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 990.6875, "completions/mean_terminated_length": 990.6875, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.18243648729745948, "frac_reward_zero_std": 0.0, "grad_norm": 2.92171306604703, "kl": 0.005954742431640625, "learning_rate": 9.815943599841138e-07, "loss": -0.006, "num_tokens": 39522746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9515438079833984, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013930748266304718, "rewards/wordcountpos_reward/raw_geo/std": 0.0485237743724745, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 907.75, "completions/mean_terminated_length": 907.75, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.1826365273054611, "frac_reward_zero_std": 0.0, "grad_norm": 4.039595263979884, "kl": 0.0097808837890625, "learning_rate": 9.815053052201938e-07, "loss": -0.0177, "num_tokens": 39561846.0, "reward": 5.960464477539063e-08, "reward_std": 0.8298944234848022, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1072343145369315, "rewards/wordcountpos_reward/raw_geo/std": 0.0901326456376238, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1093.4375, "completions/mean_terminated_length": 1066.3333740234375, "completions/min_length": 575.0, "completions/min_terminated_length": 575.0, "epoch": 0.1828365673134627, "frac_reward_zero_std": 0.0, "grad_norm": 2.998229941507805, "kl": 0.00606536865234375, "learning_rate": 9.814160400523274e-07, "loss": -0.0318, "num_tokens": 39611597.0, "reward": 0.0, "reward_std": 0.9494115114212036, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00804665575599557, "rewards/wordcountpos_reward/raw_geo/std": 0.01842191930143263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1000.1875, "completions/mean_terminated_length": 1000.1875, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.1830366073214643, "frac_reward_zero_std": 0.0, "grad_norm": 3.5863036384839733, "kl": 0.008045196533203125, "learning_rate": 9.81326564524041e-07, "loss": -0.0436, "num_tokens": 39662624.0, "reward": 0.0, "reward_std": 0.7314053177833557, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05659158533015089, "rewards/wordcountpos_reward/raw_geo/std": 0.07311513878055974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087683, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 994.6875, "completions/mean_terminated_length": 994.6875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.1832366473294659, "frac_reward_zero_std": 0.0, "grad_norm": 2.4923078467428557, "kl": 0.0088653564453125, "learning_rate": 9.81236878678963e-07, "loss": 0.0181, "num_tokens": 39695147.0, "reward": 0.0, "reward_std": 0.5605810284614563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03669888385187112, "rewards/wordcountpos_reward/raw_geo/std": 0.09213379448036717, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1300.375, "completions/mean_terminated_length": 1254.3077392578125, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.18343668733746749, "frac_reward_zero_std": 0.0, "grad_norm": 3.0625207878777223, "kl": 0.00675201416015625, "learning_rate": 9.81146982560825e-07, "loss": -0.0184, "num_tokens": 39739689.0, "reward": -2.9802322387695312e-08, "reward_std": 0.544572114944458, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04276772020932102, "rewards/wordcountpos_reward/raw_geo/std": 0.0779179918129157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1237.125, "completions/mean_terminated_length": 1199.571533203125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.1836367273454691, "frac_reward_zero_std": 0.0, "grad_norm": 2.7928034829206565, "kl": 0.005916595458984375, "learning_rate": 9.810568762134602e-07, "loss": -0.0001, "num_tokens": 39787571.0, "reward": 0.0, "reward_std": 0.6140346527099609, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06258728712073611, "rewards/wordcountpos_reward/raw_geo/std": 0.14988995833111593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1287.5, "completions/mean_terminated_length": 1273.3333740234375, "completions/min_length": 1104.0, "completions/min_terminated_length": 1104.0, "epoch": 0.18383676735347068, "frac_reward_zero_std": 0.0, "grad_norm": 2.508562556398617, "kl": 0.0067596435546875, "learning_rate": 9.809665596808052e-07, "loss": 0.0104, "num_tokens": 39835755.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9353573322296143, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05361187372565858, "rewards/wordcountpos_reward/raw_geo/std": 0.2733418558052254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1126.25, "completions/mean_terminated_length": 1126.25, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.1840368073614723, "frac_reward_zero_std": 0.0, "grad_norm": 3.4605016606311287, "kl": 0.0082244873046875, "learning_rate": 9.808760330068989e-07, "loss": -0.0289, "num_tokens": 39878959.0, "reward": 0.0, "reward_std": 0.9318041801452637, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10305904704367594, "rewards/wordcountpos_reward/raw_geo/std": 0.08061109890416099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1268.25, "completions/mean_terminated_length": 1268.25, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.1842368473694739, "frac_reward_zero_std": 0.0, "grad_norm": 2.148218969910704, "kl": 0.0030364990234375, "learning_rate": 9.807852962358822e-07, "loss": 0.004, "num_tokens": 39916979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8309612274169922, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043274024465947955, "rewards/wordcountpos_reward/raw_geo/std": 0.27643031694670195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1034.5625, "completions/mean_terminated_length": 1034.5625, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.1844368873774755, "frac_reward_zero_std": 0.0, "grad_norm": 3.6725786354322207, "kl": 0.0082855224609375, "learning_rate": 9.806943494119989e-07, "loss": -0.0338, "num_tokens": 39951452.0, "reward": 4.470348358154297e-08, "reward_std": 0.9785497188568115, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08835530952891071, "rewards/wordcountpos_reward/raw_geo/std": 0.043336109940599006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 990.3125, "completions/mean_terminated_length": 956.3333740234375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.1846369273854771, "frac_reward_zero_std": 0.0, "grad_norm": 2.6022400172543607, "kl": 0.00708770751953125, "learning_rate": 9.806031925795951e-07, "loss": -0.2141, "num_tokens": 40004249.0, "reward": -1.862645149230957e-08, "reward_std": 0.8384362459182739, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08457805336744288, "rewards/wordcountpos_reward/raw_geo/std": 0.2404769504864457, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1238.75, "completions/mean_terminated_length": 1221.3333740234375, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.18483696739347868, "frac_reward_zero_std": 0.0, "grad_norm": 3.205288225011804, "kl": 0.00838470458984375, "learning_rate": 9.805118257831192e-07, "loss": -0.0194, "num_tokens": 40060021.0, "reward": 0.0, "reward_std": 1.054551124572754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06803175917655288, "rewards/wordcountpos_reward/raw_geo/std": 0.22152616310655693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1296.3125, "completions/mean_terminated_length": 1267.21435546875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.1850370074014803, "frac_reward_zero_std": 0.0, "grad_norm": 2.818805730571551, "kl": 0.00707244873046875, "learning_rate": 9.804202490671223e-07, "loss": -0.0544, "num_tokens": 40106034.0, "reward": -5.960464477539063e-08, "reward_std": 0.3621934652328491, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10248418901885131, "rewards/wordcountpos_reward/raw_geo/std": 0.1645897828360318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1125.625, "completions/mean_terminated_length": 1125.625, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.1852370474094819, "frac_reward_zero_std": 0.0, "grad_norm": 3.7229536699729144, "kl": 0.0099639892578125, "learning_rate": 9.803284624762575e-07, "loss": -0.0441, "num_tokens": 40149404.0, "reward": 0.0, "reward_std": 0.6757339239120483, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03311160811618911, "rewards/wordcountpos_reward/raw_geo/std": 0.1451890887697495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1073.5625, "completions/mean_terminated_length": 1073.5625, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.1854370874174835, "frac_reward_zero_std": 0.0, "grad_norm": 3.4811291230655197, "kl": 0.00765228271484375, "learning_rate": 9.8023646605528e-07, "loss": 0.0576, "num_tokens": 40198693.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4679914712905884, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07139827817899022, "rewards/wordcountpos_reward/raw_geo/std": 0.17635879176403244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1156.6875, "completions/mean_terminated_length": 1156.6875, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.1856371274254851, "frac_reward_zero_std": 0.0, "grad_norm": 2.8947509616842786, "kl": 0.00762176513671875, "learning_rate": 9.801442598490485e-07, "loss": -0.0308, "num_tokens": 40248576.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8155903816223145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03296823420794705, "rewards/wordcountpos_reward/raw_geo/std": 0.058206315890683286, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1069.25, "completions/mean_terminated_length": 1069.25, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.1858371674334867, "frac_reward_zero_std": 0.0, "grad_norm": 3.85629544001022, "kl": 0.012115478515625, "learning_rate": 9.800518439025223e-07, "loss": -0.0276, "num_tokens": 40301076.0, "reward": 1.4901161193847656e-08, "reward_std": 1.035229206085205, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024291140479526983, "rewards/wordcountpos_reward/raw_geo/std": 0.30076259184815257, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.13381856152046848, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1228.25, "completions/mean_terminated_length": 1228.25, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.1860372074414883, "frac_reward_zero_std": 0.0, "grad_norm": 2.6997257041282894, "kl": 0.005893707275390625, "learning_rate": 9.799592182607642e-07, "loss": 0.0115, "num_tokens": 40344344.0, "reward": 4.470348358154297e-08, "reward_std": 1.0092113018035889, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4039932754308158, "rewards/wordcountpos_reward/raw_geo/std": 0.2504430719790084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1372.5, "completions/mean_terminated_length": 1296.0, "completions/min_length": 1178.0, "completions/min_terminated_length": 1178.0, "epoch": 0.18623724744948988, "frac_reward_zero_std": 0.0, "grad_norm": 2.2987923499829175, "kl": 0.00505828857421875, "learning_rate": 9.79866382968939e-07, "loss": 0.0133, "num_tokens": 40391952.0, "reward": 0.0, "reward_std": 0.40316373109817505, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08187932635907305, "rewards/wordcountpos_reward/raw_geo/std": 0.2504759261413003, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1154.75, "completions/mean_terminated_length": 1131.7333984375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.1864372874574915, "frac_reward_zero_std": 0.0, "grad_norm": 2.0311869702156775, "kl": 0.0056915283203125, "learning_rate": 9.797733380723133e-07, "loss": -0.0695, "num_tokens": 40424852.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5888725519180298, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03884571415396457, "rewards/wordcountpos_reward/raw_geo/std": 0.13365507589392253, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1163.9375, "completions/mean_terminated_length": 1086.3846435546875, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.1866373274654931, "frac_reward_zero_std": 0.0, "grad_norm": 2.713463046000283, "kl": 0.00612640380859375, "learning_rate": 9.796800836162565e-07, "loss": -0.049, "num_tokens": 40463659.0, "reward": 0.0, "reward_std": 0.9785711765289307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01299969202872348, "rewards/wordcountpos_reward/raw_geo/std": 0.05868218130798983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1033.5, "completions/mean_terminated_length": 1033.5, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.1868373674734947, "frac_reward_zero_std": 0.0, "grad_norm": 3.369930282506248, "kl": 0.01165771484375, "learning_rate": 9.795866196462397e-07, "loss": -0.0001, "num_tokens": 40511627.0, "reward": 2.9802322387695312e-08, "reward_std": 0.45107266306877136, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1155522557167349, "rewards/wordcountpos_reward/raw_geo/std": 0.1935156610313652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 811.9375, "completions/mean_terminated_length": 811.9375, "completions/min_length": 601.0, "completions/min_terminated_length": 601.0, "epoch": 0.1870374074814963, "frac_reward_zero_std": 0.0, "grad_norm": 4.163878345780283, "kl": 0.009735107421875, "learning_rate": 9.794929462078366e-07, "loss": -0.0531, "num_tokens": 40551186.0, "reward": -5.960464477539063e-08, "reward_std": 0.5556304454803467, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06432098510256544, "rewards/wordcountpos_reward/raw_geo/std": 0.1269093651061461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1059.8125, "completions/mean_terminated_length": 1059.8125, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.1872374474894979, "frac_reward_zero_std": 0.0, "grad_norm": 3.684987623639056, "kl": 0.01019287109375, "learning_rate": 9.793990633467225e-07, "loss": 0.0214, "num_tokens": 40594879.0, "reward": 3.725290298461914e-09, "reward_std": 1.0598126649856567, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.34002017180132854, "rewards/wordcountpos_reward/raw_geo/std": 0.17281953128100952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1128.1875, "completions/mean_terminated_length": 1128.1875, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.1874374874974995, "frac_reward_zero_std": 0.0, "grad_norm": 4.202908195244333, "kl": 0.012939453125, "learning_rate": 9.793049711086754e-07, "loss": 0.021, "num_tokens": 40654610.0, "reward": 0.0, "reward_std": 0.7663412094116211, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09802861035418453, "rewards/wordcountpos_reward/raw_geo/std": 0.0984539074503435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1064.0, "completions/mean_terminated_length": 1034.933349609375, "completions/min_length": 680.0, "completions/min_terminated_length": 680.0, "epoch": 0.1876375275055011, "frac_reward_zero_std": 0.0, "grad_norm": 3.2155208794007915, "kl": 0.007045745849609375, "learning_rate": 9.79210669539575e-07, "loss": -0.0325, "num_tokens": 40688346.0, "reward": 0.0, "reward_std": 0.7037836313247681, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015032288791508627, "rewards/wordcountpos_reward/raw_geo/std": 0.060248950796143334, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1054.0625, "completions/mean_terminated_length": 1054.0625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1878375675135027, "frac_reward_zero_std": 0.0, "grad_norm": 3.460942358114053, "kl": 0.0079803466796875, "learning_rate": 9.791161586854028e-07, "loss": -0.0167, "num_tokens": 40735075.0, "reward": 0.0, "reward_std": 0.7381820678710938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17071789608775356, "rewards/wordcountpos_reward/raw_geo/std": 0.1399092621066253, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 940.375, "completions/mean_terminated_length": 940.375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.1880376075215043, "frac_reward_zero_std": 0.0, "grad_norm": 3.9540195000080924, "kl": 0.0081939697265625, "learning_rate": 9.790214385922432e-07, "loss": 0.0089, "num_tokens": 40772721.0, "reward": 0.0, "reward_std": 0.9864704608917236, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03415326785270988, "rewards/wordcountpos_reward/raw_geo/std": 0.18909002065239966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1002.0625, "completions/mean_terminated_length": 1002.0625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.1882376475295059, "frac_reward_zero_std": 0.0, "grad_norm": 2.6784234338753503, "kl": 0.0067291259765625, "learning_rate": 9.789265093062822e-07, "loss": -0.0146, "num_tokens": 40810962.0, "reward": -2.0489096641540527e-08, "reward_std": 0.935067355632782, "rewards/wordcountpos_reward/mean": -2.0489096641540527e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09327132078886768, "rewards/wordcountpos_reward/raw_geo/std": 0.1269901313339403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1181.0, "completions/mean_terminated_length": 1181.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.1884376875375075, "frac_reward_zero_std": 0.0, "grad_norm": 2.3718240305837406, "kl": 0.00388336181640625, "learning_rate": 9.788313708738074e-07, "loss": -0.0341, "num_tokens": 40852498.0, "reward": 1.4901161193847656e-08, "reward_std": 0.97857666015625, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.092014345781485, "rewards/wordcountpos_reward/raw_geo/std": 0.046848672428784766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 977.5, "completions/mean_terminated_length": 977.5, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.18863772754550912, "frac_reward_zero_std": 0.0, "grad_norm": 3.673159131719144, "kl": 0.0086212158203125, "learning_rate": 9.787360233412088e-07, "loss": -0.0065, "num_tokens": 40889730.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9617863893508911, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01499261313592456, "rewards/wordcountpos_reward/raw_geo/std": 0.06637304140611676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1080.75, "completions/mean_terminated_length": 1080.75, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.1888377675535107, "frac_reward_zero_std": 0.0, "grad_norm": 3.7363677045967534, "kl": 0.0089263916015625, "learning_rate": 9.786404667549785e-07, "loss": -0.0154, "num_tokens": 40919758.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9655193090438843, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08338189901855494, "rewards/wordcountpos_reward/raw_geo/std": 0.08902969840183604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.21391413992361344, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1282.5625, "completions/mean_terminated_length": 1251.5, "completions/min_length": 1068.0, "completions/min_terminated_length": 1068.0, "epoch": 0.1890378075615123, "frac_reward_zero_std": 0.0, "grad_norm": 2.9672736899275782, "kl": 0.00748443603515625, "learning_rate": 9.785447011617101e-07, "loss": 0.0227, "num_tokens": 40966631.0, "reward": 0.0, "reward_std": 0.7386121153831482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.089826090091396, "rewards/wordcountpos_reward/raw_geo/std": 0.2264867558972907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.17191729277636836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1176.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 875.875, "completions/mean_terminated_length": 875.875, "completions/min_length": 533.0, "completions/min_terminated_length": 533.0, "epoch": 0.1892378475695139, "frac_reward_zero_std": 0.0, "grad_norm": 3.4993930436063767, "kl": 0.0088043212890625, "learning_rate": 9.784487266080995e-07, "loss": -0.0888, "num_tokens": 41001821.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0333006381988525, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13892540733122008, "rewards/wordcountpos_reward/raw_geo/std": 0.08107529403261154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 933.75, "completions/mean_terminated_length": 933.75, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.1894378875775155, "frac_reward_zero_std": 0.0, "grad_norm": 3.2492788300126274, "kl": 0.00623321533203125, "learning_rate": 9.783525431409443e-07, "loss": -0.0341, "num_tokens": 41051601.0, "reward": 0.0, "reward_std": 0.7974460124969482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16827035213083133, "rewards/wordcountpos_reward/raw_geo/std": 0.30028014448223206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1094.5, "completions/mean_terminated_length": 1094.5, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.1896379275855171, "frac_reward_zero_std": 0.0, "grad_norm": 3.808474183590221, "kl": 0.0098114013671875, "learning_rate": 9.78256150807144e-07, "loss": -0.0015, "num_tokens": 41103529.0, "reward": 0.0, "reward_std": 0.3778834044933319, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.020940434938996468, "rewards/wordcountpos_reward/raw_geo/std": 0.028493398935873975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999157, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1125.75, "completions/mean_terminated_length": 1072.2857666015625, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.1898379675935187, "frac_reward_zero_std": 0.0, "grad_norm": 3.4818027599143915, "kl": 0.011016845703125, "learning_rate": 9.781595496536997e-07, "loss": -0.0586, "num_tokens": 41153741.0, "reward": 5.960464477539063e-08, "reward_std": 0.6365479230880737, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.31218962735374806, "rewards/wordcountpos_reward/raw_geo/std": 0.169594822972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1158.1875, "completions/mean_terminated_length": 1135.4000244140625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.19003800760152031, "frac_reward_zero_std": 0.0, "grad_norm": 3.337117188239104, "kl": 0.00824737548828125, "learning_rate": 9.780627397277149e-07, "loss": -0.02, "num_tokens": 41194640.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8792279958724976, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0536612397072808, "rewards/wordcountpos_reward/raw_geo/std": 0.07746358153949842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 576.0, "completions/min_terminated_length": 576.0, "epoch": 0.1902380476095219, "frac_reward_zero_std": 0.0, "grad_norm": 3.1007355978220614, "kl": 0.00705718994140625, "learning_rate": 9.779657210763944e-07, "loss": -0.0226, "num_tokens": 41231227.0, "reward": 0.0, "reward_std": 1.0548025369644165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0045855926888992565, "rewards/wordcountpos_reward/raw_geo/std": 0.042990815802110095, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1053.6875, "completions/mean_terminated_length": 1023.9334106445312, "completions/min_length": 612.0, "completions/min_terminated_length": 612.0, "epoch": 0.1904380876175235, "frac_reward_zero_std": 0.0, "grad_norm": 3.146373689396383, "kl": 0.00689697265625, "learning_rate": 9.778684937470449e-07, "loss": -0.0315, "num_tokens": 41270774.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8776946663856506, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05510189335825958, "rewards/wordcountpos_reward/raw_geo/std": 0.20166074293095868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792297, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1195.125, "completions/mean_terminated_length": 1174.800048828125, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.1906381276255251, "frac_reward_zero_std": 0.0, "grad_norm": 2.025457138913446, "kl": 0.005359649658203125, "learning_rate": 9.77771057787075e-07, "loss": -0.0184, "num_tokens": 41321712.0, "reward": -2.9802322387695312e-08, "reward_std": 0.727824866771698, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2048883808902143, "rewards/wordcountpos_reward/raw_geo/std": 0.16196443135966973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1142.0, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.1908381676335267, "frac_reward_zero_std": 0.0, "grad_norm": 3.5318611129265505, "kl": 0.0095062255859375, "learning_rate": 9.776734132439948e-07, "loss": -0.0624, "num_tokens": 41365534.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3782709240913391, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15518698027748226, "rewards/wordcountpos_reward/raw_geo/std": 0.1091001333583044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1104.5, "completions/mean_terminated_length": 1104.5, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.19103820764152832, "frac_reward_zero_std": 0.0, "grad_norm": 2.717478323221834, "kl": 0.0077667236328125, "learning_rate": 9.775755601654163e-07, "loss": -0.0202, "num_tokens": 41410966.0, "reward": 0.0, "reward_std": 0.8377324938774109, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05236426151473175, "rewards/wordcountpos_reward/raw_geo/std": 0.05389861928272634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1032.8125, "completions/mean_terminated_length": 1032.8125, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.1912382476495299, "frac_reward_zero_std": 0.0, "grad_norm": 3.5367118137997884, "kl": 0.0077667236328125, "learning_rate": 9.774774985990531e-07, "loss": -0.0195, "num_tokens": 41443851.0, "reward": 3.725290298461914e-09, "reward_std": 1.0271151065826416, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11298251456849681, "rewards/wordcountpos_reward/raw_geo/std": 0.02084135336988205, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1053.8125, "completions/mean_terminated_length": 1053.8125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.19143828765753151, "frac_reward_zero_std": 0.0, "grad_norm": 3.1117085526957804, "kl": 0.0086822509765625, "learning_rate": 9.773792285927204e-07, "loss": 0.0119, "num_tokens": 41484328.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0307097434997559, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043141481545671684, "rewards/wordcountpos_reward/raw_geo/std": 0.24255058215578731, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1167.4375, "completions/mean_terminated_length": 1167.4375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.1916383276655331, "frac_reward_zero_std": 0.0, "grad_norm": 3.2129636731068816, "kl": 0.008514404296875, "learning_rate": 9.772807501943352e-07, "loss": 0.0204, "num_tokens": 41521719.0, "reward": 0.0, "reward_std": 1.0393407344818115, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12965038779104493, "rewards/wordcountpos_reward/raw_geo/std": 0.08441975781806724, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1038.1875, "completions/mean_terminated_length": 1038.1875, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.1918383676735347, "frac_reward_zero_std": 0.0, "grad_norm": 3.5629243635685652, "kl": 0.00872039794921875, "learning_rate": 9.77182063451916e-07, "loss": -0.0497, "num_tokens": 41566938.0, "reward": -1.4901161193847656e-08, "reward_std": 1.004325032234192, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07352977140174699, "rewards/wordcountpos_reward/raw_geo/std": 0.111066035024086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1238.9375, "completions/mean_terminated_length": 1178.6923828125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1920384076815363, "frac_reward_zero_std": 0.0, "grad_norm": 3.0164824637685608, "kl": 0.00860595703125, "learning_rate": 9.770831684135825e-07, "loss": -0.0582, "num_tokens": 41621081.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6376367807388306, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03276566488367626, "rewards/wordcountpos_reward/raw_geo/std": 0.0983017904178729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 1069.9375, "completions/mean_terminated_length": 1069.9375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.1922384476895379, "frac_reward_zero_std": 0.0, "grad_norm": 3.521652737115484, "kl": 0.00927734375, "learning_rate": 9.76984065127557e-07, "loss": -0.0088, "num_tokens": 41667560.0, "reward": 0.0, "reward_std": 0.8463444113731384, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06925460984066141, "rewards/wordcountpos_reward/raw_geo/std": 0.4339992986635063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1083.875, "completions/mean_terminated_length": 1056.1334228515625, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.19243848769753952, "frac_reward_zero_std": 0.0, "grad_norm": 2.8058250931398803, "kl": 0.005828857421875, "learning_rate": 9.768847536421628e-07, "loss": -0.0343, "num_tokens": 41708630.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6933983564376831, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15456258207414825, "rewards/wordcountpos_reward/raw_geo/std": 0.12007180741351452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1119.0625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.1926385277055411, "frac_reward_zero_std": 0.0, "grad_norm": 3.3230610852581797, "kl": 0.01111602783203125, "learning_rate": 9.76785234005824e-07, "loss": -0.0159, "num_tokens": 41757175.0, "reward": 5.960464477539063e-08, "reward_std": 0.618442952632904, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12197930537576111, "rewards/wordcountpos_reward/raw_geo/std": 0.1729320999573292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1395.1875, "completions/mean_terminated_length": 1260.4285888671875, "completions/min_length": 1083.0, "completions/min_terminated_length": 1083.0, "epoch": 0.1928385677135427, "frac_reward_zero_std": 0.0, "grad_norm": 1.9034664701635828, "kl": 0.004024505615234375, "learning_rate": 9.76685506267067e-07, "loss": 0.0, "num_tokens": 41806586.0, "reward": -3.725290298461914e-08, "reward_std": 1.0329450368881226, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1655605184014538, "rewards/wordcountpos_reward/raw_geo/std": 0.0901731679471355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1163.75, "completions/mean_terminated_length": 1163.75, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.1930386077215443, "frac_reward_zero_std": 0.0, "grad_norm": 3.628883578815303, "kl": 0.0093231201171875, "learning_rate": 9.765855704745196e-07, "loss": -0.0111, "num_tokens": 41844014.0, "reward": 0.0, "reward_std": 0.6979318857192993, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0696106848600084, "rewards/wordcountpos_reward/raw_geo/std": 0.2402973831267178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1050.4375, "completions/mean_terminated_length": 986.21435546875, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.1932386477295459, "frac_reward_zero_std": 0.0, "grad_norm": 2.9072885721970954, "kl": 0.0061798095703125, "learning_rate": 9.764854266769112e-07, "loss": 0.0236, "num_tokens": 41876885.0, "reward": 0.0, "reward_std": 0.6620415449142456, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011167292201370055, "rewards/wordcountpos_reward/raw_geo/std": 0.06220947820361317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 1078.0, "completions/mean_terminated_length": 1049.86669921875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.19343868773754752, "frac_reward_zero_std": 0.0, "grad_norm": 3.7060119984863347, "kl": 0.01177978515625, "learning_rate": 9.763850749230719e-07, "loss": 0.0546, "num_tokens": 41916493.0, "reward": 0.0, "reward_std": 0.8707925081253052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3934134521310546, "rewards/wordcountpos_reward/raw_geo/std": 0.21077258660552445, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1142.5625, "completions/mean_terminated_length": 1142.5625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.1936387277455491, "frac_reward_zero_std": 0.0, "grad_norm": 3.7829595182124085, "kl": 0.010772705078125, "learning_rate": 9.76284515261934e-07, "loss": 0.0307, "num_tokens": 41954942.0, "reward": 0.0, "reward_std": 0.4330425262451172, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07786518552428937, "rewards/wordcountpos_reward/raw_geo/std": 0.08364627810801072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1340.4375, "completions/mean_terminated_length": 1267.9091796875, "completions/min_length": 1051.0, "completions/min_terminated_length": 1051.0, "epoch": 0.19383876775355072, "frac_reward_zero_std": 0.0, "grad_norm": 3.3035216691790685, "kl": 0.009368896484375, "learning_rate": 9.761837477425306e-07, "loss": 0.0093, "num_tokens": 42001525.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6716781854629517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10570408102791175, "rewards/wordcountpos_reward/raw_geo/std": 0.10795750849376855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1061.0625, "completions/mean_terminated_length": 1061.0625, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.1940388077615523, "frac_reward_zero_std": 0.0, "grad_norm": 3.346635918510029, "kl": 0.00823974609375, "learning_rate": 9.760827724139967e-07, "loss": 0.0121, "num_tokens": 42049838.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0304864645004272, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03648364764650533, "rewards/wordcountpos_reward/raw_geo/std": 0.23760163433039144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1154.0, "completions/mean_terminated_length": 1154.0, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.1942388477695539, "frac_reward_zero_std": 0.0, "grad_norm": 3.2439435691879868, "kl": 0.0102081298828125, "learning_rate": 9.75981589325568e-07, "loss": -0.0396, "num_tokens": 42099486.0, "reward": 0.0, "reward_std": 0.876253068447113, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17271538754340202, "rewards/wordcountpos_reward/raw_geo/std": 0.17509874594368177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1234.25, "completions/mean_terminated_length": 1172.923095703125, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.19443888777755552, "frac_reward_zero_std": 0.0, "grad_norm": 3.4757502753356073, "kl": 0.009765625, "learning_rate": 9.758801985265822e-07, "loss": -0.0013, "num_tokens": 42149370.0, "reward": 0.0, "reward_std": 0.842781662940979, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11323660183352387, "rewards/wordcountpos_reward/raw_geo/std": 0.15145568012805483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1176.625, "completions/mean_terminated_length": 1130.4285888671875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.1946389277855571, "frac_reward_zero_std": 0.0, "grad_norm": 3.1036467203599782, "kl": 0.00916290283203125, "learning_rate": 9.757786000664776e-07, "loss": -0.0158, "num_tokens": 42205308.0, "reward": 0.0, "reward_std": 0.8360965251922607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10275137900902076, "rewards/wordcountpos_reward/raw_geo/std": 0.06586226345953472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1141.0, "completions/mean_terminated_length": 1117.0667724609375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.19483896779355872, "frac_reward_zero_std": 0.0, "grad_norm": 3.021099739457683, "kl": 0.008209228515625, "learning_rate": 9.756767939947943e-07, "loss": -0.0311, "num_tokens": 42253412.0, "reward": 0.0, "reward_std": 1.019844889640808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007511562888598632, "rewards/wordcountpos_reward/raw_geo/std": 0.06708045674482292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1179.0, "completions/mean_terminated_length": 1179.0, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.1950390078015603, "frac_reward_zero_std": 0.0, "grad_norm": 2.854015569032638, "kl": 0.00693511962890625, "learning_rate": 9.755747803611732e-07, "loss": -0.0543, "num_tokens": 42293540.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5685156583786011, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06669527383566692, "rewards/wordcountpos_reward/raw_geo/std": 0.058346499102673276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 925.875, "completions/mean_terminated_length": 925.875, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.19523904780956192, "frac_reward_zero_std": 0.0, "grad_norm": 4.153345392237854, "kl": 0.0108795166015625, "learning_rate": 9.754725592153568e-07, "loss": -0.0064, "num_tokens": 42330922.0, "reward": 0.0, "reward_std": 0.9093058109283447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04304334797540349, "rewards/wordcountpos_reward/raw_geo/std": 0.15730764071942582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1105.5, "completions/mean_terminated_length": 1105.5, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.1954390878175635, "frac_reward_zero_std": 0.0, "grad_norm": 3.661816799130481, "kl": 0.009307861328125, "learning_rate": 9.753701306071882e-07, "loss": 0.0021, "num_tokens": 42366290.0, "reward": 7.450580596923828e-09, "reward_std": 1.0274888277053833, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0812317415337402, "rewards/wordcountpos_reward/raw_geo/std": 0.05955075190937857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.1956391278255651, "frac_reward_zero_std": 0.0, "grad_norm": 2.8004850709686835, "kl": 0.0074462890625, "learning_rate": 9.752674945866127e-07, "loss": 0.005, "num_tokens": 42401408.0, "reward": -7.450580596923828e-09, "reward_std": 1.0198220014572144, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.023738225912251193, "rewards/wordcountpos_reward/raw_geo/std": 0.04440457900992265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1089.1875, "completions/mean_terminated_length": 1061.800048828125, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.19583916783356672, "frac_reward_zero_std": 0.0, "grad_norm": 3.568812250465844, "kl": 0.00994873046875, "learning_rate": 9.751646512036756e-07, "loss": 0.0032, "num_tokens": 42451611.0, "reward": 0.0, "reward_std": 0.7490805387496948, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09593330105966062, "rewards/wordcountpos_reward/raw_geo/std": 0.2202655803146919, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1055.0, "completions/max_terminated_length": 1055.0, "completions/mean_length": 851.3125, "completions/mean_terminated_length": 851.3125, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.1960392078415683, "frac_reward_zero_std": 0.0, "grad_norm": 2.3782033366002433, "kl": 0.004711151123046875, "learning_rate": 9.750616005085239e-07, "loss": -0.0175, "num_tokens": 42480184.0, "reward": 5.960464477539063e-08, "reward_std": 0.8601989150047302, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0245311797473303, "rewards/wordcountpos_reward/raw_geo/std": 0.06770323783539561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 1019.75, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.19623924784956992, "frac_reward_zero_std": 0.0, "grad_norm": 3.4574797523548084, "kl": 0.0111083984375, "learning_rate": 9.749583425514056e-07, "loss": -0.0184, "num_tokens": 42514812.0, "reward": 5.960464477539063e-08, "reward_std": 0.46284985542297363, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3294717789373263, "rewards/wordcountpos_reward/raw_geo/std": 0.21859223012515988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1212.6875, "completions/mean_terminated_length": 1212.6875, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.1964392878575715, "frac_reward_zero_std": 0.0, "grad_norm": 2.9099196360236563, "kl": 0.007171630859375, "learning_rate": 9.748548773826699e-07, "loss": -0.0148, "num_tokens": 42563895.0, "reward": 0.0, "reward_std": 1.0514941215515137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01889332752140014, "rewards/wordcountpos_reward/raw_geo/std": 0.07481984926625332, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1058.875, "completions/mean_terminated_length": 1058.875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.19663932786557312, "frac_reward_zero_std": 0.0, "grad_norm": 3.151585454078633, "kl": 0.00701904296875, "learning_rate": 9.747512050527667e-07, "loss": 0.0253, "num_tokens": 42596989.0, "reward": 0.0, "reward_std": 1.046484112739563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015493456575434527, "rewards/wordcountpos_reward/raw_geo/std": 0.15277289265175256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1209.9375, "completions/mean_terminated_length": 1113.25, "completions/min_length": 672.0, "completions/min_terminated_length": 672.0, "epoch": 0.19683936787357473, "frac_reward_zero_std": 0.0, "grad_norm": 2.9048178232358635, "kl": 0.00804901123046875, "learning_rate": 9.746473256122473e-07, "loss": -0.034, "num_tokens": 42637820.0, "reward": 0.0, "reward_std": 0.36593806743621826, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05224381525715387, "rewards/wordcountpos_reward/raw_geo/std": 0.06349015503984301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1152.0, "completions/mean_terminated_length": 1071.6923828125, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.1970394078815763, "frac_reward_zero_std": 0.0, "grad_norm": 3.0629351016771613, "kl": 0.0122833251953125, "learning_rate": 9.745432391117634e-07, "loss": -0.0031, "num_tokens": 42691764.0, "reward": 0.0, "reward_std": 0.6746702194213867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.25304571711899665, "rewards/wordcountpos_reward/raw_geo/std": 0.09335789006529434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1049.0, "completions/mean_terminated_length": 1049.0, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.19723944788957792, "frac_reward_zero_std": 0.0, "grad_norm": 3.5900638441724966, "kl": 0.0093231201171875, "learning_rate": 9.744389456020683e-07, "loss": -0.0179, "num_tokens": 42741092.0, "reward": 0.0, "reward_std": 0.5195954442024231, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03323786939776842, "rewards/wordcountpos_reward/raw_geo/std": 0.10291103956641054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1222.0625, "completions/mean_terminated_length": 1203.533447265625, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.1974394878975795, "frac_reward_zero_std": 0.0, "grad_norm": 3.2543367145183417, "kl": 0.0085906982421875, "learning_rate": 9.743344451340161e-07, "loss": -0.0615, "num_tokens": 42790005.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9430963397026062, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10766070143457532, "rewards/wordcountpos_reward/raw_geo/std": 0.12288434424109687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1415.5625, "completions/mean_terminated_length": 1377.181884765625, "completions/min_length": 1287.0, "completions/min_terminated_length": 1287.0, "epoch": 0.19763952790558112, "frac_reward_zero_std": 0.0, "grad_norm": 2.886796861628751, "kl": 0.008636474609375, "learning_rate": 9.742297377585617e-07, "loss": -0.0101, "num_tokens": 42847150.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0454461574554443, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016622545838982518, "rewards/wordcountpos_reward/raw_geo/std": 0.13484991419802797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1299.875, "completions/mean_terminated_length": 1179.800048828125, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.1978395679135827, "frac_reward_zero_std": 0.0, "grad_norm": 3.2002692872767162, "kl": 0.0091400146484375, "learning_rate": 9.741248235267608e-07, "loss": 0.0083, "num_tokens": 42893956.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9585890769958496, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007615239840049383, "rewards/wordcountpos_reward/raw_geo/std": 0.07497166709515451, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1137.375, "completions/mean_terminated_length": 1113.2000732421875, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.19803960792158432, "frac_reward_zero_std": 0.0, "grad_norm": 3.6598295686262654, "kl": 0.0095977783203125, "learning_rate": 9.740197024897697e-07, "loss": -0.0598, "num_tokens": 42945754.0, "reward": 0.0, "reward_std": 0.5584701299667358, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00893313560242432, "rewards/wordcountpos_reward/raw_geo/std": 0.2142660894191206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1046.875, "completions/mean_terminated_length": 1046.875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.19823964792958593, "frac_reward_zero_std": 0.0, "grad_norm": 3.5833108644296234, "kl": 0.00970458984375, "learning_rate": 9.739143746988466e-07, "loss": -0.0391, "num_tokens": 42985264.0, "reward": -7.450580596923828e-09, "reward_std": 1.0384418964385986, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08075193953930891, "rewards/wordcountpos_reward/raw_geo/std": 0.0889943981378654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 973.25, "completions/mean_terminated_length": 973.25, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.1984396879375875, "frac_reward_zero_std": 0.0, "grad_norm": 3.838958553485212, "kl": 0.009521484375, "learning_rate": 9.738088402053494e-07, "loss": 0.0133, "num_tokens": 43027740.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6900591850280762, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.020625120433161916, "rewards/wordcountpos_reward/raw_geo/std": 0.040872144250805806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 966.0, "completions/mean_terminated_length": 966.0, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.19863972794558912, "frac_reward_zero_std": 0.0, "grad_norm": 2.942477990039526, "kl": 0.00498199462890625, "learning_rate": 9.73703099060737e-07, "loss": -0.0335, "num_tokens": 43056948.0, "reward": 2.9802322387695312e-08, "reward_std": 0.671892523765564, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1324626588364287, "rewards/wordcountpos_reward/raw_geo/std": 0.10159128676666625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1055.25, "completions/mean_terminated_length": 1025.60009765625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.1988397679535907, "frac_reward_zero_std": 0.0, "grad_norm": 3.1296251252511853, "kl": 0.009765625, "learning_rate": 9.735971513165697e-07, "loss": -0.0137, "num_tokens": 43107104.0, "reward": 7.450580596923828e-09, "reward_std": 1.0283710956573486, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1545300709551925, "rewards/wordcountpos_reward/raw_geo/std": 0.08963247578208394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1269.1875, "completions/mean_terminated_length": 1269.1875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.19903980796159232, "frac_reward_zero_std": 0.0, "grad_norm": 2.6896227126846854, "kl": 0.00626373291015625, "learning_rate": 9.734909970245076e-07, "loss": -0.0177, "num_tokens": 43160643.0, "reward": 0.0, "reward_std": 0.8295433521270752, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05391219745952789, "rewards/wordcountpos_reward/raw_geo/std": 0.03497497878269501, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1120.375, "completions/mean_terminated_length": 1120.375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.19923984796959393, "frac_reward_zero_std": 0.0, "grad_norm": 2.2234385607897336, "kl": 0.00362396240234375, "learning_rate": 9.733846362363127e-07, "loss": -0.0124, "num_tokens": 43200889.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9960434436798096, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18255350614425478, "rewards/wordcountpos_reward/raw_geo/std": 0.10320528482418585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1044.625, "completions/mean_terminated_length": 1044.625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.19943988797759551, "frac_reward_zero_std": 0.0, "grad_norm": 2.3706599514308855, "kl": 0.00672149658203125, "learning_rate": 9.732780690038464e-07, "loss": -0.029, "num_tokens": 43246539.0, "reward": -2.9802322387695312e-08, "reward_std": 0.806902289390564, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14570355804490545, "rewards/wordcountpos_reward/raw_geo/std": 0.2641254723789616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1085254706406647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1139.375, "completions/mean_terminated_length": 1115.3333740234375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.19963992798559713, "frac_reward_zero_std": 0.0, "grad_norm": 3.49452593312421, "kl": 0.0141754150390625, "learning_rate": 9.731712953790718e-07, "loss": 0.0327, "num_tokens": 43297497.0, "reward": -2.9802322387695312e-08, "reward_std": 0.871312141418457, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23033980866026177, "rewards/wordcountpos_reward/raw_geo/std": 0.11314929356550549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 979.8125, "completions/mean_terminated_length": 979.8125, "completions/min_length": 670.0, "completions/min_terminated_length": 670.0, "epoch": 0.1998399679935987, "frac_reward_zero_std": 0.0, "grad_norm": 4.057703688846008, "kl": 0.0113983154296875, "learning_rate": 9.730643154140518e-07, "loss": -0.0041, "num_tokens": 43337534.0, "reward": 7.450580596923828e-09, "reward_std": 0.9890952706336975, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.000717136643856052, "rewards/wordcountpos_reward/raw_geo/std": 0.1644765721873642, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1256.3125, "completions/mean_terminated_length": 1240.0667724609375, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.20004000800160032, "frac_reward_zero_std": 0.0, "grad_norm": 3.0434314326695864, "kl": 0.00659942626953125, "learning_rate": 9.729571291609507e-07, "loss": 0.0067, "num_tokens": 43385067.0, "reward": 0.0, "reward_std": 0.6750105619430542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24994730968579076, "rewards/wordcountpos_reward/raw_geo/std": 0.1243578461053855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1161.0, "completions/mean_terminated_length": 1138.4000244140625, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.20024004800960193, "frac_reward_zero_std": 0.0, "grad_norm": 1.399634499785646, "kl": 0.005084991455078125, "learning_rate": 9.728497366720326e-07, "loss": -0.0336, "num_tokens": 43422891.0, "reward": 0.0, "reward_std": 0.9323999285697937, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0651619885517758, "rewards/wordcountpos_reward/raw_geo/std": 0.07319846293518127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 978.4375, "completions/mean_terminated_length": 943.6666870117188, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.20044008801760352, "frac_reward_zero_std": 0.0, "grad_norm": 3.8183821745056505, "kl": 0.0240631103515625, "learning_rate": 9.727421379996629e-07, "loss": 0.0219, "num_tokens": 43466570.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8229279518127441, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.003386494474354254, "rewards/wordcountpos_reward/raw_geo/std": 0.1337370553313024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1254.5625, "completions/mean_terminated_length": 1197.923095703125, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.20064012802560513, "frac_reward_zero_std": 0.0, "grad_norm": 3.0389045351919304, "kl": 0.009735107421875, "learning_rate": 9.72634333196307e-07, "loss": -0.0325, "num_tokens": 43520907.0, "reward": 0.0, "reward_std": 1.035268783569336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11362823035027286, "rewards/wordcountpos_reward/raw_geo/std": 0.07524407763852438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1040.625, "completions/mean_terminated_length": 1040.625, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.20084016803360671, "frac_reward_zero_std": 0.0, "grad_norm": 3.8449276166291813, "kl": 0.01239013671875, "learning_rate": 9.72526322314531e-07, "loss": -0.0429, "num_tokens": 43569077.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0190765857696533, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012059881097390247, "rewards/wordcountpos_reward/raw_geo/std": 0.1589921796287149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1066.375, "completions/mean_terminated_length": 1066.375, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.20104020804160833, "frac_reward_zero_std": 0.0, "grad_norm": 3.2187429289676253, "kl": 0.007488250732421875, "learning_rate": 9.724181054070018e-07, "loss": -0.0093, "num_tokens": 43606907.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6909418702125549, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2556306419603813, "rewards/wordcountpos_reward/raw_geo/std": 0.15910946882496937, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1266.4375, "completions/mean_terminated_length": 1160.272705078125, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.2012402480496099, "frac_reward_zero_std": 0.0, "grad_norm": 2.9364824537407386, "kl": 0.008056640625, "learning_rate": 9.723096825264862e-07, "loss": -0.0111, "num_tokens": 43653738.0, "reward": 0.0, "reward_std": 0.8947299718856812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06235842115735778, "rewards/wordcountpos_reward/raw_geo/std": 0.15017209687269067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 919.25, "completions/mean_terminated_length": 919.25, "completions/min_length": 575.0, "completions/min_terminated_length": 575.0, "epoch": 0.20144028805761152, "frac_reward_zero_std": 0.0, "grad_norm": 3.9551609060186763, "kl": 0.00917816162109375, "learning_rate": 9.722010537258516e-07, "loss": 0.013, "num_tokens": 43688534.0, "reward": -5.960464477539063e-08, "reward_std": 0.506924569606781, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15070179428562483, "rewards/wordcountpos_reward/raw_geo/std": 0.13979391716697442, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1030.8125, "completions/mean_terminated_length": 1030.8125, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.20164032806561313, "frac_reward_zero_std": 0.0, "grad_norm": 3.2433443084594336, "kl": 0.010711669921875, "learning_rate": 9.720922190580662e-07, "loss": -0.0007, "num_tokens": 43734483.0, "reward": 0.0, "reward_std": 0.9012659192085266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06391463854055113, "rewards/wordcountpos_reward/raw_geo/std": 0.1302640900397669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1269.0, "completions/mean_terminated_length": 1253.60009765625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.20184036807361472, "frac_reward_zero_std": 0.0, "grad_norm": 2.7286355380619396, "kl": 0.0070953369140625, "learning_rate": 9.719831785761981e-07, "loss": -0.0353, "num_tokens": 43770819.0, "reward": 0.0, "reward_std": 0.9553343057632446, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0908261959148386, "rewards/wordcountpos_reward/raw_geo/std": 0.042759784935710685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1405.375, "completions/mean_terminated_length": 1310.75, "completions/min_length": 1152.0, "completions/min_terminated_length": 1152.0, "epoch": 0.20204040808161633, "frac_reward_zero_std": 0.0, "grad_norm": 2.8465258755884864, "kl": 0.0076446533203125, "learning_rate": 9.71873932333416e-07, "loss": -0.0083, "num_tokens": 43815001.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8110127449035645, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1908153374345717, "rewards/wordcountpos_reward/raw_geo/std": 0.199833009011391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 937.6875, "completions/mean_terminated_length": 937.6875, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.2022404480896179, "frac_reward_zero_std": 0.0, "grad_norm": 4.074079248938705, "kl": 0.0098419189453125, "learning_rate": 9.717644803829886e-07, "loss": 0.0146, "num_tokens": 43844468.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6750224232673645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04593400618804369, "rewards/wordcountpos_reward/raw_geo/std": 0.14252343778669646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 1048.75, "completions/mean_terminated_length": 1018.666748046875, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.20244048809761953, "frac_reward_zero_std": 0.0, "grad_norm": 3.1035989183070685, "kl": 0.00640869140625, "learning_rate": 9.716548227782854e-07, "loss": 0.0186, "num_tokens": 43885864.0, "reward": -2.2351741790771484e-08, "reward_std": 1.048802137374878, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021547923324218496, "rewards/wordcountpos_reward/raw_geo/std": 0.0517572886398629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 1122.0625, "completions/mean_terminated_length": 1068.071533203125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.20264052810562114, "frac_reward_zero_std": 0.0, "grad_norm": 3.2404770470182935, "kl": 0.0105743408203125, "learning_rate": 9.71544959572776e-07, "loss": -0.0386, "num_tokens": 43928345.0, "reward": 0.0, "reward_std": 1.0619075298309326, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0492492401782355, "rewards/wordcountpos_reward/raw_geo/std": 0.05833713263304141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1128.375, "completions/mean_terminated_length": 1128.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.20284056811362272, "frac_reward_zero_std": 0.0, "grad_norm": 3.3657429378593324, "kl": 0.0091094970703125, "learning_rate": 9.7143489082003e-07, "loss": -0.0155, "num_tokens": 43975567.0, "reward": 0.0, "reward_std": 0.319256991147995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015362074205052636, "rewards/wordcountpos_reward/raw_geo/std": 0.12340124484360218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.65, "rewards/wordcountpos_reward/raw_rule/std": 0.30453364467779376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1104.625, "completions/mean_terminated_length": 1104.625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.20304060812162433, "frac_reward_zero_std": 0.0, "grad_norm": 3.185615311746243, "kl": 0.0096282958984375, "learning_rate": 9.713246165737177e-07, "loss": -0.0759, "num_tokens": 44017673.0, "reward": -2.9802322387695312e-08, "reward_std": 0.44372987747192383, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0797116445323041, "rewards/wordcountpos_reward/raw_geo/std": 0.07849822580631244, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 986.625, "completions/mean_terminated_length": 986.625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.20324064812962592, "frac_reward_zero_std": 0.0, "grad_norm": 3.495395315578162, "kl": 0.00913238525390625, "learning_rate": 9.712141368876092e-07, "loss": -0.0221, "num_tokens": 44059059.0, "reward": 0.0, "reward_std": 0.6407808661460876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0890741892342035, "rewards/wordcountpos_reward/raw_geo/std": 0.08983352967736655, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1055.5, "completions/mean_terminated_length": 1055.5, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.20344068813762753, "frac_reward_zero_std": 0.0, "grad_norm": 3.170061551755287, "kl": 0.0094146728515625, "learning_rate": 9.711034518155746e-07, "loss": -0.0622, "num_tokens": 44106723.0, "reward": 0.0, "reward_std": 0.6020821332931519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00011505968698227731, "rewards/wordcountpos_reward/raw_geo/std": 0.16896957555983888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17506612507320812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1252.75, "completions/mean_terminated_length": 1252.75, "completions/min_length": 1120.0, "completions/min_terminated_length": 1120.0, "epoch": 0.2036407281456291, "frac_reward_zero_std": 0.0, "grad_norm": 2.4621360294908987, "kl": 0.006805419921875, "learning_rate": 9.709925614115849e-07, "loss": 0.0073, "num_tokens": 44148655.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9484935998916626, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08429771219319272, "rewards/wordcountpos_reward/raw_geo/std": 0.052876669531145705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 996.75, "completions/mean_terminated_length": 996.75, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.20384076815363072, "frac_reward_zero_std": 0.0, "grad_norm": 4.090329188622991, "kl": 0.012451171875, "learning_rate": 9.708814657297105e-07, "loss": 0.0112, "num_tokens": 44194107.0, "reward": 0.0, "reward_std": 0.880154013633728, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21787251053087683, "rewards/wordcountpos_reward/raw_geo/std": 0.13905024675194633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941139, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1055.125, "completions/mean_terminated_length": 1055.125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.20404080816163234, "frac_reward_zero_std": 0.0, "grad_norm": 2.317716015973491, "kl": 0.00559234619140625, "learning_rate": 9.707701648241223e-07, "loss": -0.032, "num_tokens": 44231437.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9892827868461609, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03434693083274678, "rewards/wordcountpos_reward/raw_geo/std": 0.05628299206201947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1089.125, "completions/mean_terminated_length": 1089.125, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.20424084816963392, "frac_reward_zero_std": 0.0, "grad_norm": 3.430035718638262, "kl": 0.009124755859375, "learning_rate": 9.706586587490908e-07, "loss": 0.0525, "num_tokens": 44272919.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0335841178894043, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028271410102089327, "rewards/wordcountpos_reward/raw_geo/std": 0.097688141579826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1074.875, "completions/mean_terminated_length": 1074.875, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.20444088817763553, "frac_reward_zero_std": 0.0, "grad_norm": 3.2780810189160525, "kl": 0.0096282958984375, "learning_rate": 9.705469475589875e-07, "loss": -0.0398, "num_tokens": 44305125.0, "reward": 0.0, "reward_std": 0.9569463729858398, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02301718987540889, "rewards/wordcountpos_reward/raw_geo/std": 0.03126932343765298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1035.6875, "completions/mean_terminated_length": 1035.6875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.20464092818563712, "frac_reward_zero_std": 0.0, "grad_norm": 3.3608929556981004, "kl": 0.010162353515625, "learning_rate": 9.704350313082827e-07, "loss": -0.0108, "num_tokens": 44338328.0, "reward": 0.0, "reward_std": 0.9830136895179749, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09146580262743688, "rewards/wordcountpos_reward/raw_geo/std": 0.12937457719782372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1071.3125, "completions/mean_terminated_length": 1042.7333984375, "completions/min_length": 626.0, "completions/min_terminated_length": 626.0, "epoch": 0.20484096819363873, "frac_reward_zero_std": 0.0, "grad_norm": 2.9423630011472475, "kl": 0.00988006591796875, "learning_rate": 9.703229100515476e-07, "loss": 0.0003, "num_tokens": 44385205.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9214580059051514, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11169865509064815, "rewards/wordcountpos_reward/raw_geo/std": 0.05598194963629006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1042.125, "completions/mean_terminated_length": 1011.6000366210938, "completions/min_length": 728.0, "completions/min_terminated_length": 728.0, "epoch": 0.20504100820164034, "frac_reward_zero_std": 0.0, "grad_norm": 3.2124276466546506, "kl": 0.0076141357421875, "learning_rate": 9.702105838434528e-07, "loss": -0.0065, "num_tokens": 44415439.0, "reward": 0.0, "reward_std": 0.6295967102050781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0162787288376992, "rewards/wordcountpos_reward/raw_geo/std": 0.06378971790509128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1110.0, "completions/mean_terminated_length": 1084.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.20524104820964192, "frac_reward_zero_std": 0.0, "grad_norm": 3.520301643165769, "kl": 0.0111083984375, "learning_rate": 9.700980527387692e-07, "loss": 0.0043, "num_tokens": 44467287.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9457652568817139, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10087234255350139, "rewards/wordcountpos_reward/raw_geo/std": 0.07879581422583315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1159.125, "completions/mean_terminated_length": 1110.4285888671875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.20544108821764354, "frac_reward_zero_std": 0.0, "grad_norm": 3.3295307185596497, "kl": 0.0095367431640625, "learning_rate": 9.699853167923675e-07, "loss": -0.0288, "num_tokens": 44502601.0, "reward": 0.0, "reward_std": 0.6006546020507812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.003064591765003523, "rewards/wordcountpos_reward/raw_geo/std": 0.12383102429605307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1419.375, "completions/mean_terminated_length": 1356.6666259765625, "completions/min_length": 1125.0, "completions/min_terminated_length": 1125.0, "epoch": 0.20564112822564512, "frac_reward_zero_std": 0.0, "grad_norm": 2.963689846310938, "kl": 0.00814056396484375, "learning_rate": 9.698723760592182e-07, "loss": -0.0088, "num_tokens": 44559871.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0168511867523193, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15300859766625066, "rewards/wordcountpos_reward/raw_geo/std": 0.26536751178136914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1200.5, "completions/mean_terminated_length": 1180.533447265625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.20584116823364673, "frac_reward_zero_std": 0.0, "grad_norm": 2.8322887391144014, "kl": 0.00760650634765625, "learning_rate": 9.697592305943917e-07, "loss": 0.0202, "num_tokens": 44601959.0, "reward": 0.0, "reward_std": 0.9511810541152954, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0730598886110834, "rewards/wordcountpos_reward/raw_geo/std": 0.0708538610963962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1343.1875, "completions/mean_terminated_length": 1271.9091796875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.20604120824164832, "frac_reward_zero_std": 0.0, "grad_norm": 2.4594413853019943, "kl": 0.00547027587890625, "learning_rate": 9.696458804530582e-07, "loss": -0.0158, "num_tokens": 44649010.0, "reward": -5.960464477539063e-08, "reward_std": 0.7383902072906494, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22492243973412773, "rewards/wordcountpos_reward/raw_geo/std": 0.17503506611537023, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 982.0, "completions/mean_terminated_length": 982.0, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.20624124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.4996242284965136, "kl": 0.011444091796875, "learning_rate": 9.69532325690488e-07, "loss": -0.058, "num_tokens": 44688690.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0064935684204102, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024527595457799145, "rewards/wordcountpos_reward/raw_geo/std": 0.12104033671782122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1211.0625, "completions/mean_terminated_length": 1191.800048828125, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.20644128825765154, "frac_reward_zero_std": 0.0, "grad_norm": 3.2057668136861093, "kl": 0.0098876953125, "learning_rate": 9.694185663620505e-07, "loss": -0.0261, "num_tokens": 44732035.0, "reward": 0.0, "reward_std": 0.9098652601242065, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.007008216224957144, "rewards/wordcountpos_reward/raw_geo/std": 0.047497767298104045, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1142.6923828125, "completions/min_length": 446.0, "completions/min_terminated_length": 446.0, "epoch": 0.20664132826565312, "frac_reward_zero_std": 0.0, "grad_norm": 3.0143958106665023, "kl": 0.0100250244140625, "learning_rate": 9.693046025232158e-07, "loss": -0.0284, "num_tokens": 44775254.0, "reward": 0.0, "reward_std": 0.7934092283248901, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005931047853998304, "rewards/wordcountpos_reward/raw_geo/std": 0.03753031833194541, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1157.4375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.20684136827365474, "frac_reward_zero_std": 0.0, "grad_norm": 3.58827631959605, "kl": 0.0111236572265625, "learning_rate": 9.691904342295527e-07, "loss": -0.0267, "num_tokens": 44811389.0, "reward": -3.725290298461914e-08, "reward_std": 1.0079220533370972, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10497499442564695, "rewards/wordcountpos_reward/raw_geo/std": 0.06730277239524456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1147.625, "completions/mean_terminated_length": 1147.625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.20704140828165632, "frac_reward_zero_std": 0.0, "grad_norm": 2.612447949866124, "kl": 0.0074920654296875, "learning_rate": 9.690760615367303e-07, "loss": -0.0002, "num_tokens": 44854199.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0288931131362915, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19293829786213518, "rewards/wordcountpos_reward/raw_geo/std": 0.08432007848148097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1106.5, "completions/mean_terminated_length": 1106.5, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.20724144828965793, "frac_reward_zero_std": 0.0, "grad_norm": 2.1016342536230606, "kl": 0.00634002685546875, "learning_rate": 9.689614845005175e-07, "loss": -0.0015, "num_tokens": 44890079.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0467946529388428, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2070276611857532, "rewards/wordcountpos_reward/raw_geo/std": 0.11631432940669552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 917.0, "completions/max_terminated_length": 917.0, "completions/mean_length": 781.6875, "completions/mean_terminated_length": 781.6875, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.20744148829765954, "frac_reward_zero_std": 0.0, "grad_norm": 3.2564041530876815, "kl": 0.0065155029296875, "learning_rate": 9.688467031767824e-07, "loss": 0.0159, "num_tokens": 44915442.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9421567916870117, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059829417446559764, "rewards/wordcountpos_reward/raw_geo/std": 0.06451722720566706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1175.9375, "completions/mean_terminated_length": 1129.6429443359375, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.20764152830566113, "frac_reward_zero_std": 0.0, "grad_norm": 3.356593784508273, "kl": 0.0098876953125, "learning_rate": 9.687317176214927e-07, "loss": 0.0385, "num_tokens": 44967169.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9754360318183899, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13508211703300332, "rewards/wordcountpos_reward/raw_geo/std": 0.14859961766053717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1070.6875, "completions/mean_terminated_length": 1070.6875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.20784156831366274, "frac_reward_zero_std": 0.0, "grad_norm": 2.885257352748282, "kl": 0.00785064697265625, "learning_rate": 9.686165278907162e-07, "loss": -0.0121, "num_tokens": 44999892.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8495421409606934, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012762074037665194, "rewards/wordcountpos_reward/raw_geo/std": 0.0540356347368421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1057.25, "completions/mean_terminated_length": 1057.25, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.20804160832166432, "frac_reward_zero_std": 0.0, "grad_norm": 3.4147544811439112, "kl": 0.01132965087890625, "learning_rate": 9.6850113404062e-07, "loss": -0.0153, "num_tokens": 45040344.0, "reward": 5.960464477539063e-08, "reward_std": 0.5505907535552979, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01923252980863088, "rewards/wordcountpos_reward/raw_geo/std": 0.07539343064477129, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 973.8125, "completions/mean_terminated_length": 973.8125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.20824164832966593, "frac_reward_zero_std": 0.0, "grad_norm": 3.6230334080118825, "kl": 0.01043701171875, "learning_rate": 9.683855361274702e-07, "loss": -0.033, "num_tokens": 45086949.0, "reward": -5.960464477539063e-08, "reward_std": 0.8344032764434814, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08313853851249176, "rewards/wordcountpos_reward/raw_geo/std": 0.17758198901836925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1404.875, "completions/mean_terminated_length": 1373.166748046875, "completions/min_length": 1268.0, "completions/min_terminated_length": 1268.0, "epoch": 0.20844168833766755, "frac_reward_zero_std": 0.0, "grad_norm": 2.1634322477222776, "kl": 0.00661468505859375, "learning_rate": 9.68269734207633e-07, "loss": 0.0223, "num_tokens": 45137075.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6665918827056885, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08271687474049956, "rewards/wordcountpos_reward/raw_geo/std": 0.10089660794624224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1018.375, "completions/mean_terminated_length": 1018.375, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.20864172834566913, "frac_reward_zero_std": 0.0, "grad_norm": 3.402176778991871, "kl": 0.009002685546875, "learning_rate": 9.681537283375741e-07, "loss": -0.0158, "num_tokens": 45184705.0, "reward": 0.0, "reward_std": 0.9108322858810425, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016036360514874032, "rewards/wordcountpos_reward/raw_geo/std": 0.1401057574100318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1107.8125, "completions/mean_terminated_length": 1107.8125, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.20884176835367074, "frac_reward_zero_std": 0.0, "grad_norm": 2.8765865360277427, "kl": 0.0115203857421875, "learning_rate": 9.680375185738587e-07, "loss": -0.031, "num_tokens": 45227462.0, "reward": 0.0, "reward_std": 1.0264365673065186, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06504414869856646, "rewards/wordcountpos_reward/raw_geo/std": 0.07875761126609393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 1097.25, "completions/mean_terminated_length": 1097.25, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.20904180836167233, "frac_reward_zero_std": 0.0, "grad_norm": 2.2016970759665, "kl": 0.00402069091796875, "learning_rate": 9.67921104973151e-07, "loss": 0.0092, "num_tokens": 45268962.0, "reward": -1.1175870895385742e-08, "reward_std": 0.9274089336395264, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06010035708222863, "rewards/wordcountpos_reward/raw_geo/std": 0.3090398875540753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1072.125, "completions/mean_terminated_length": 1072.125, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.20924184836967394, "frac_reward_zero_std": 0.0, "grad_norm": 2.9994268996822533, "kl": 0.0062255859375, "learning_rate": 9.678044875922147e-07, "loss": 0.0377, "num_tokens": 45311108.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0063072443008423, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10105689556638398, "rewards/wordcountpos_reward/raw_geo/std": 0.04508599698142703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1027.0, "completions/mean_terminated_length": 1027.0, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.20944188837767552, "frac_reward_zero_std": 0.0, "grad_norm": 2.447000251039581, "kl": 0.0064697265625, "learning_rate": 9.67687666487913e-07, "loss": -0.02, "num_tokens": 45361996.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9451016187667847, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007728871421802886, "rewards/wordcountpos_reward/raw_geo/std": 0.2379883134382006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1246.6875, "completions/mean_terminated_length": 1162.25, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.20964192838567713, "frac_reward_zero_std": 0.0, "grad_norm": 3.241285870311567, "kl": 0.010589599609375, "learning_rate": 9.675706417172084e-07, "loss": 0.005, "num_tokens": 45414223.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8232929706573486, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08335312582719405, "rewards/wordcountpos_reward/raw_geo/std": 0.10084649643342726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 885.3125, "completions/mean_terminated_length": 885.3125, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.20984196839367875, "frac_reward_zero_std": 0.0, "grad_norm": 2.710960868451215, "kl": 0.0064849853515625, "learning_rate": 9.674534133371629e-07, "loss": -0.0018, "num_tokens": 45446644.0, "reward": 0.0, "reward_std": 0.6163842678070068, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11342590766734405, "rewards/wordcountpos_reward/raw_geo/std": 0.08481188737958653, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15098442401882486, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1281.1875, "completions/mean_terminated_length": 1230.6923828125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.21004200840168033, "frac_reward_zero_std": 0.0, "grad_norm": 3.1793658987228026, "kl": 0.009857177734375, "learning_rate": 9.673359814049372e-07, "loss": -0.0261, "num_tokens": 45500079.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0164932012557983, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20837301836171723, "rewards/wordcountpos_reward/raw_geo/std": 0.17508007661172637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.21024204840968194, "frac_reward_zero_std": 0.0, "grad_norm": 3.587978684397212, "kl": 0.0127716064453125, "learning_rate": 9.672183459777922e-07, "loss": -0.008, "num_tokens": 45540732.0, "reward": 0.0, "reward_std": 0.5821975469589233, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11284981974797713, "rewards/wordcountpos_reward/raw_geo/std": 0.06318909683715614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043477, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 931.75, "completions/mean_terminated_length": 931.75, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.21044208841768353, "frac_reward_zero_std": 0.0, "grad_norm": 3.020482927286804, "kl": 0.00839996337890625, "learning_rate": 9.671005071130868e-07, "loss": -0.0012, "num_tokens": 45583240.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8391639590263367, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15111352624647637, "rewards/wordcountpos_reward/raw_geo/std": 0.08228366703570168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1197.25, "completions/mean_terminated_length": 1177.0667724609375, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.21064212842568514, "frac_reward_zero_std": 0.0, "grad_norm": 2.38701586915838, "kl": 0.00493621826171875, "learning_rate": 9.669824648682805e-07, "loss": -0.0186, "num_tokens": 45620772.0, "reward": 0.0, "reward_std": 0.6755601167678833, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053968503614717674, "rewards/wordcountpos_reward/raw_geo/std": 0.051191887898063886, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1225.3125, "completions/mean_terminated_length": 1161.923095703125, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.21084216843368675, "frac_reward_zero_std": 0.0, "grad_norm": 3.4163455851709816, "kl": 0.0115966796875, "learning_rate": 9.668642193009306e-07, "loss": -0.001, "num_tokens": 45665609.0, "reward": -7.450580596923828e-09, "reward_std": 0.9238741397857666, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13307950189102855, "rewards/wordcountpos_reward/raw_geo/std": 0.10841996812082676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1086.625, "completions/mean_terminated_length": 1086.625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.21104220844168833, "frac_reward_zero_std": 0.0, "grad_norm": 3.509020213821013, "kl": 0.0106658935546875, "learning_rate": 9.667457704686943e-07, "loss": -0.0135, "num_tokens": 45702547.0, "reward": 0.0, "reward_std": 0.648855984210968, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13632479433874617, "rewards/wordcountpos_reward/raw_geo/std": 0.11279095120114178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 912.0625, "completions/mean_terminated_length": 912.0625, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.21124224844968995, "frac_reward_zero_std": 0.0, "grad_norm": 2.9178123293520177, "kl": 0.00921630859375, "learning_rate": 9.66627118429328e-07, "loss": -0.0651, "num_tokens": 45735508.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6051981449127197, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14563859013122746, "rewards/wordcountpos_reward/raw_geo/std": 0.17647188123951113, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1193.625, "completions/mean_terminated_length": 1173.2000732421875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.21144228845769153, "frac_reward_zero_std": 0.0, "grad_norm": 2.793992040655907, "kl": 0.00777435302734375, "learning_rate": 9.665082632406872e-07, "loss": 0.0167, "num_tokens": 45782486.0, "reward": 0.0, "reward_std": 0.8433632850646973, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07907572323693199, "rewards/wordcountpos_reward/raw_geo/std": 0.06316771403222832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1070.375, "completions/mean_terminated_length": 1070.375, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.21164232846569314, "frac_reward_zero_std": 0.0, "grad_norm": 2.7798368895577505, "kl": 0.00786590576171875, "learning_rate": 9.663892049607257e-07, "loss": 0.0368, "num_tokens": 45823916.0, "reward": 0.0, "reward_std": 0.9004707336425781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027594963536194485, "rewards/wordcountpos_reward/raw_geo/std": 0.079562450765131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1268.5, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.21184236847369473, "frac_reward_zero_std": 0.0, "grad_norm": 2.0835311256406075, "kl": 0.004917144775390625, "learning_rate": 9.662699436474969e-07, "loss": 0.0064, "num_tokens": 45873092.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9966124296188354, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06549685387733982, "rewards/wordcountpos_reward/raw_geo/std": 0.17348312533307966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 933.75, "completions/mean_terminated_length": 933.75, "completions/min_length": 672.0, "completions/min_terminated_length": 672.0, "epoch": 0.21204240848169634, "frac_reward_zero_std": 0.0, "grad_norm": 2.978527572282619, "kl": 0.00930023193359375, "learning_rate": 9.661504793591536e-07, "loss": 0.0002, "num_tokens": 45911520.0, "reward": -2.9802322387695312e-08, "reward_std": 0.664455235004425, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13253658253712405, "rewards/wordcountpos_reward/raw_geo/std": 0.10800361605666144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1371.625, "completions/mean_terminated_length": 1271.77783203125, "completions/min_length": 1120.0, "completions/min_terminated_length": 1120.0, "epoch": 0.21224244848969795, "frac_reward_zero_std": 0.0, "grad_norm": 2.8444264432079427, "kl": 0.0092315673828125, "learning_rate": 9.660308121539469e-07, "loss": -0.0092, "num_tokens": 45965930.0, "reward": -1.4901161193847656e-08, "reward_std": 1.000205159187317, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17549675618436897, "rewards/wordcountpos_reward/raw_geo/std": 0.2991769376030446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1165.3125, "completions/mean_terminated_length": 964.5, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.21244248849769953, "frac_reward_zero_std": 0.0, "grad_norm": 3.591085584940392, "kl": 0.011962890625, "learning_rate": 9.659109420902268e-07, "loss": -0.0158, "num_tokens": 46016799.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0620059967041016, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14965807290379401, "rewards/wordcountpos_reward/raw_geo/std": 0.3004387441070456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1056.0, "completions/mean_terminated_length": 1056.0, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.21264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 3.4207288658657964, "kl": 0.00936126708984375, "learning_rate": 9.65790869226443e-07, "loss": -0.0202, "num_tokens": 46061543.0, "reward": 0.0, "reward_std": 0.8493836522102356, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13030053737032024, "rewards/wordcountpos_reward/raw_geo/std": 0.2348020547535778, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1119.0, "completions/max_terminated_length": 1119.0, "completions/mean_length": 869.0625, "completions/mean_terminated_length": 869.0625, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.21284256851370273, "frac_reward_zero_std": 0.0, "grad_norm": 3.19263064361344, "kl": 0.00640869140625, "learning_rate": 9.65670593621143e-07, "loss": 0.0674, "num_tokens": 46099832.0, "reward": 0.0, "reward_std": 1.0267257690429688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007941629760634433, "rewards/wordcountpos_reward/raw_geo/std": 0.19021647278088488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1129.3125, "completions/mean_terminated_length": 1104.60009765625, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.21304260852170434, "frac_reward_zero_std": 0.0, "grad_norm": 3.615103622792107, "kl": 0.012664794921875, "learning_rate": 9.655501153329743e-07, "loss": -0.0711, "num_tokens": 46149597.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8551164865493774, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14384238241639116, "rewards/wordcountpos_reward/raw_geo/std": 0.11721721854152951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1005.6875, "completions/mean_terminated_length": 1005.6875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.21324264852970595, "frac_reward_zero_std": 0.0, "grad_norm": 3.1498245107586467, "kl": 0.00713348388671875, "learning_rate": 9.654294344206822e-07, "loss": -0.0089, "num_tokens": 46199496.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5969727039337158, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11124711081266404, "rewards/wordcountpos_reward/raw_geo/std": 0.13319919426396323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1407.625, "completions/mean_terminated_length": 1253.666748046875, "completions/min_length": 1128.0, "completions/min_terminated_length": 1128.0, "epoch": 0.21344268853770754, "frac_reward_zero_std": 0.0, "grad_norm": 2.9984404393330983, "kl": 0.0108642578125, "learning_rate": 9.653085509431115e-07, "loss": -0.0039, "num_tokens": 46253370.0, "reward": 0.0, "reward_std": 0.6834812164306641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11845751290341999, "rewards/wordcountpos_reward/raw_geo/std": 0.06554628022720338, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1102.1875, "completions/mean_terminated_length": 1102.1875, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.21364272854570915, "frac_reward_zero_std": 0.0, "grad_norm": 2.513547314579187, "kl": 0.00943756103515625, "learning_rate": 9.651874649592055e-07, "loss": -0.0135, "num_tokens": 46300453.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8549100160598755, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08059477219930013, "rewards/wordcountpos_reward/raw_geo/std": 0.09359746601571091, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1159.9375, "completions/mean_terminated_length": 1137.2667236328125, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.21384276855371073, "frac_reward_zero_std": 0.0, "grad_norm": 3.54619867232021, "kl": 0.00970458984375, "learning_rate": 9.650661765280062e-07, "loss": 0.0268, "num_tokens": 46346956.0, "reward": 0.0, "reward_std": 1.0458364486694336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12527216717577552, "rewards/wordcountpos_reward/raw_geo/std": 0.21419353369906438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1135.4375, "completions/mean_terminated_length": 1083.357177734375, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.21404280856171234, "frac_reward_zero_std": 0.0, "grad_norm": 2.5576607830932514, "kl": 0.00873565673828125, "learning_rate": 9.649446857086547e-07, "loss": -0.0102, "num_tokens": 46390283.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6127912998199463, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14722177944036385, "rewards/wordcountpos_reward/raw_geo/std": 0.09576126114317503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.22273551829717486, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1341.0, "completions/mean_terminated_length": 1288.0, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.21424284856971396, "frac_reward_zero_std": 0.0, "grad_norm": 2.6071929831578657, "kl": 0.013671875, "learning_rate": 9.648229925603898e-07, "loss": -0.0413, "num_tokens": 46444107.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8557888269424438, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05484255920086129, "rewards/wordcountpos_reward/raw_geo/std": 0.06143771948901399, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 991.7857666015625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.21444288857771554, "frac_reward_zero_std": 0.0, "grad_norm": 2.652320596881037, "kl": 0.0081329345703125, "learning_rate": 9.647010971425503e-07, "loss": 0.0135, "num_tokens": 46495344.0, "reward": 0.0, "reward_std": 0.8134421110153198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.087615328207544, "rewards/wordcountpos_reward/raw_geo/std": 0.09709196364717053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1018.875, "completions/mean_terminated_length": 1018.875, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.21464292858571715, "frac_reward_zero_std": 0.0, "grad_norm": 3.657674774712119, "kl": 0.00905609130859375, "learning_rate": 9.645789995145727e-07, "loss": 0.0133, "num_tokens": 46525654.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9971361756324768, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10032695983383245, "rewards/wordcountpos_reward/raw_geo/std": 0.043220729766449195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1150.5625, "completions/mean_terminated_length": 1069.923095703125, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.21484296859371874, "frac_reward_zero_std": 0.0, "grad_norm": 2.9314360367275305, "kl": 0.00862884521484375, "learning_rate": 9.644566997359924e-07, "loss": -0.0507, "num_tokens": 46560535.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9673082828521729, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05799556343012018, "rewards/wordcountpos_reward/raw_geo/std": 0.033834630381845074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1247.6875, "completions/mean_terminated_length": 1247.6875, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.21504300860172035, "frac_reward_zero_std": 0.0, "grad_norm": 3.3839645467628334, "kl": 0.0104522705078125, "learning_rate": 9.643341978664432e-07, "loss": -0.0185, "num_tokens": 46605698.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9555608630180359, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19188525105786305, "rewards/wordcountpos_reward/raw_geo/std": 0.10247647472578679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 957.6875, "completions/mean_terminated_length": 957.6875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.21524304860972193, "frac_reward_zero_std": 0.0, "grad_norm": 3.4530412537920028, "kl": 0.00930023193359375, "learning_rate": 9.642114939656579e-07, "loss": -0.0232, "num_tokens": 46645605.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9252154231071472, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05879350998611767, "rewards/wordcountpos_reward/raw_geo/std": 0.07150558204956681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1257.875, "completions/mean_terminated_length": 1069.5555419921875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.21544308861772354, "frac_reward_zero_std": 0.0, "grad_norm": 3.140753769990601, "kl": 0.0104827880859375, "learning_rate": 9.64088588093467e-07, "loss": 0.0025, "num_tokens": 46693555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9229426383972168, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0829657690297842, "rewards/wordcountpos_reward/raw_geo/std": 0.10869345599233637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1150.1875, "completions/mean_terminated_length": 1150.1875, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.21564312862572516, "frac_reward_zero_std": 0.0, "grad_norm": 2.884670916257981, "kl": 0.00891876220703125, "learning_rate": 9.639654803098003e-07, "loss": -0.0335, "num_tokens": 46731406.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9441984295845032, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020168653768786463, "rewards/wordcountpos_reward/raw_geo/std": 0.08700413303880884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1242.8125, "completions/mean_terminated_length": 1225.666748046875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.21584316863372674, "frac_reward_zero_std": 0.0, "grad_norm": 3.389205647697212, "kl": 0.0114288330078125, "learning_rate": 9.638421706746857e-07, "loss": 0.0108, "num_tokens": 46777259.0, "reward": 0.0, "reward_std": 0.5140067934989929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07717342371003036, "rewards/wordcountpos_reward/raw_geo/std": 0.10192442662462703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1261.5, "completions/mean_terminated_length": 1261.5, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.21604320864172835, "frac_reward_zero_std": 0.0, "grad_norm": 2.9733405400614323, "kl": 0.0107574462890625, "learning_rate": 9.637186592482493e-07, "loss": -0.0665, "num_tokens": 46821347.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6560477018356323, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05107118868237846, "rewards/wordcountpos_reward/raw_geo/std": 0.04508344034151571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1349.25, "completions/mean_terminated_length": 1280.727294921875, "completions/min_length": 547.0, "completions/min_terminated_length": 547.0, "epoch": 0.21624324864972994, "frac_reward_zero_std": 0.0, "grad_norm": 2.936939892332347, "kl": 0.00922393798828125, "learning_rate": 9.63594946090716e-07, "loss": -0.0265, "num_tokens": 46867023.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0533491373062134, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11389784233958072, "rewards/wordcountpos_reward/raw_geo/std": 0.09639986584671074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1063.375, "completions/mean_terminated_length": 1063.375, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.21644328865773155, "frac_reward_zero_std": 0.0, "grad_norm": 3.6741507197046364, "kl": 0.011810302734375, "learning_rate": 9.634710312624091e-07, "loss": -0.0796, "num_tokens": 46906117.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0175392627716064, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22790182344918752, "rewards/wordcountpos_reward/raw_geo/std": 0.2431798284676306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1109.1875, "completions/mean_terminated_length": 1109.1875, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.21664332866573316, "frac_reward_zero_std": 0.0, "grad_norm": 3.7717909112352097, "kl": 0.0139617919921875, "learning_rate": 9.633469148237496e-07, "loss": -0.0517, "num_tokens": 46953760.0, "reward": 0.0, "reward_std": 0.7441670894622803, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10746334610262578, "rewards/wordcountpos_reward/raw_geo/std": 0.23615392774343336, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1233.5, "completions/mean_terminated_length": 1233.5, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.21684336867373474, "frac_reward_zero_std": 0.0, "grad_norm": 3.261071754202068, "kl": 0.0125274658203125, "learning_rate": 9.632225968352577e-07, "loss": -0.002, "num_tokens": 46997336.0, "reward": 0.0, "reward_std": 0.8631924390792847, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3820017222852813, "rewards/wordcountpos_reward/raw_geo/std": 0.1072446550454882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1055.3125, "completions/mean_terminated_length": 1025.666748046875, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.21704340868173636, "frac_reward_zero_std": 0.0, "grad_norm": 2.7425358097889347, "kl": 0.00731658935546875, "learning_rate": 9.63098077357551e-07, "loss": -0.0496, "num_tokens": 47035333.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0211741924285889, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.25938255963840157, "rewards/wordcountpos_reward/raw_geo/std": 0.16069610745421134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 969.3125, "completions/mean_terminated_length": 969.3125, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.21724344868973794, "frac_reward_zero_std": 0.0, "grad_norm": 2.734345322927155, "kl": 0.0081024169921875, "learning_rate": 9.62973356451346e-07, "loss": -0.0055, "num_tokens": 47075010.0, "reward": 0.0, "reward_std": 0.7407355308532715, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0956907719983653, "rewards/wordcountpos_reward/raw_geo/std": 0.10088141257511246, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1393.25, "completions/mean_terminated_length": 1256.0, "completions/min_length": 1152.0, "completions/min_terminated_length": 1152.0, "epoch": 0.21744348869773955, "frac_reward_zero_std": 0.0, "grad_norm": 2.6552941463097226, "kl": 0.00748443603515625, "learning_rate": 9.62848434177457e-07, "loss": -0.0006, "num_tokens": 47131094.0, "reward": 0.0, "reward_std": 0.6017328500747681, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23561267216962592, "rewards/wordcountpos_reward/raw_geo/std": 0.3683826869711652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1390.5625, "completions/mean_terminated_length": 1305.4444580078125, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.21764352870574113, "frac_reward_zero_std": 0.0, "grad_norm": 1.4987613557758759, "kl": 0.002590179443359375, "learning_rate": 9.62723310596797e-07, "loss": -0.0035, "num_tokens": 47171191.0, "reward": 0.0, "reward_std": 0.7417831420898438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06531239617328487, "rewards/wordcountpos_reward/raw_geo/std": 0.08396849305583833, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1179.4375, "completions/mean_terminated_length": 1033.727294921875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.21784356871374275, "frac_reward_zero_std": 0.0, "grad_norm": 2.688953630068596, "kl": 0.007434844970703125, "learning_rate": 9.625979857703764e-07, "loss": 0.0315, "num_tokens": 47215454.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9893736243247986, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13179421208080838, "rewards/wordcountpos_reward/raw_geo/std": 0.15745701121512068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.21804360872174436, "frac_reward_zero_std": 0.0, "grad_norm": 3.7875600831645992, "kl": 0.0123748779296875, "learning_rate": 9.624724597593045e-07, "loss": 0.0307, "num_tokens": 47258870.0, "reward": 4.470348358154297e-08, "reward_std": 1.0221328735351562, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.019465946730578918, "rewards/wordcountpos_reward/raw_geo/std": 0.21083970133961472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1120.125, "completions/mean_terminated_length": 1120.125, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.21824364872974594, "frac_reward_zero_std": 0.0, "grad_norm": 3.6495935900951055, "kl": 0.0113372802734375, "learning_rate": 9.623467326247882e-07, "loss": 0.021, "num_tokens": 47302568.0, "reward": -7.450580596923828e-09, "reward_std": 1.0099070072174072, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0021527520925362863, "rewards/wordcountpos_reward/raw_geo/std": 0.21148326701724174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1265.375, "completions/mean_terminated_length": 1249.7333984375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.21844368873774755, "frac_reward_zero_std": 0.0, "grad_norm": 3.1059810826047887, "kl": 0.00798797607421875, "learning_rate": 9.622208044281328e-07, "loss": -0.036, "num_tokens": 47353694.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9267206192016602, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08419330559818967, "rewards/wordcountpos_reward/raw_geo/std": 0.09904491812716718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1045.75, "completions/mean_terminated_length": 1015.4667358398438, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.21864372874574914, "frac_reward_zero_std": 0.0, "grad_norm": 3.4800212832063746, "kl": 0.0119781494140625, "learning_rate": 9.62094675230741e-07, "loss": -0.0402, "num_tokens": 47386554.0, "reward": 1.4901161193847656e-08, "reward_std": 1.021504282951355, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08419208140668184, "rewards/wordcountpos_reward/raw_geo/std": 0.1321926627557609, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1043.6875, "completions/mean_terminated_length": 978.5000610351562, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.21884376875375075, "frac_reward_zero_std": 0.0, "grad_norm": 2.997480363044305, "kl": 0.00927734375, "learning_rate": 9.619683450941146e-07, "loss": -0.05, "num_tokens": 47423021.0, "reward": 7.450580596923828e-09, "reward_std": 1.0676316022872925, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.058299689893451793, "rewards/wordcountpos_reward/raw_geo/std": 0.07477748799547167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 969.375, "completions/mean_terminated_length": 969.375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.21904380876175236, "frac_reward_zero_std": 0.0, "grad_norm": 3.701993884192393, "kl": 0.0095367431640625, "learning_rate": 9.61841814079852e-07, "loss": -0.0179, "num_tokens": 47464643.0, "reward": -4.470348358154297e-08, "reward_std": 1.0583786964416504, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0005548188706213583, "rewards/wordcountpos_reward/raw_geo/std": 0.0668510957510083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1000.8125, "completions/mean_terminated_length": 1000.8125, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.21924384876975395, "frac_reward_zero_std": 0.0, "grad_norm": 3.4843043379656935, "kl": 0.0113677978515625, "learning_rate": 9.61715082249651e-07, "loss": -0.0391, "num_tokens": 47505328.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8710429668426514, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.095183079682951, "rewards/wordcountpos_reward/raw_geo/std": 0.15707658879698072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1099.625, "completions/mean_terminated_length": 1099.625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.21944388877775556, "frac_reward_zero_std": 0.0, "grad_norm": 3.360794977203704, "kl": 0.013092041015625, "learning_rate": 9.615881496653062e-07, "loss": -0.0054, "num_tokens": 47556586.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8489280939102173, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09213657652364075, "rewards/wordcountpos_reward/raw_geo/std": 0.23457539672749986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1119.0625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.21964392878575714, "frac_reward_zero_std": 0.0, "grad_norm": 3.030450876480138, "kl": 0.00830078125, "learning_rate": 9.61461016388711e-07, "loss": 0.0412, "num_tokens": 47600235.0, "reward": -2.2351741790771484e-08, "reward_std": 0.8335460424423218, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00836911914178208, "rewards/wordcountpos_reward/raw_geo/std": 0.24142437106790882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1110.0, "completions/mean_terminated_length": 1110.0, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.21984396879375875, "frac_reward_zero_std": 0.0, "grad_norm": 3.5239661581575783, "kl": 0.0141448974609375, "learning_rate": 9.613336824818555e-07, "loss": -0.0596, "num_tokens": 47651075.0, "reward": 0.0, "reward_std": 0.8938028812408447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005489780703130847, "rewards/wordcountpos_reward/raw_geo/std": 0.012140746747440341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1156.0, "completions/mean_terminated_length": 1133.0667724609375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.22004400880176037, "frac_reward_zero_std": 0.0, "grad_norm": 1.7121219347752192, "kl": 0.00433349609375, "learning_rate": 9.612061480068286e-07, "loss": -0.0351, "num_tokens": 47689667.0, "reward": 0.0, "reward_std": 0.5347951650619507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026385070421797985, "rewards/wordcountpos_reward/raw_geo/std": 0.13086448896849112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1152.3125, "completions/mean_terminated_length": 1129.1334228515625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.22024404880976195, "frac_reward_zero_std": 0.0, "grad_norm": 3.0789162350557917, "kl": 0.0093231201171875, "learning_rate": 9.610784130258167e-07, "loss": 0.0136, "num_tokens": 47740800.0, "reward": 0.0, "reward_std": 0.6178066730499268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.001168259657906203, "rewards/wordcountpos_reward/raw_geo/std": 0.0663770610807529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1116.9375, "completions/mean_terminated_length": 1091.4000244140625, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.22044408881776356, "frac_reward_zero_std": 0.0, "grad_norm": 2.46952708536387, "kl": 0.00746917724609375, "learning_rate": 9.60950477601104e-07, "loss": 0.0484, "num_tokens": 47790343.0, "reward": 0.0, "reward_std": 0.7428834438323975, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16240022521641462, "rewards/wordcountpos_reward/raw_geo/std": 0.1749434215610104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1017.75, "completions/mean_terminated_length": 1017.75, "completions/min_length": 642.0, "completions/min_terminated_length": 642.0, "epoch": 0.22064412882576515, "frac_reward_zero_std": 0.0, "grad_norm": 2.676057969506465, "kl": 0.0108184814453125, "learning_rate": 9.608223417950724e-07, "loss": -0.0628, "num_tokens": 47829099.0, "reward": 0.0, "reward_std": 0.35619881749153137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010071330272125869, "rewards/wordcountpos_reward/raw_geo/std": 0.07321271168142714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 992.0625, "completions/mean_terminated_length": 992.0625, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.22084416883376676, "frac_reward_zero_std": 0.0, "grad_norm": 3.454878438968108, "kl": 0.0084991455078125, "learning_rate": 9.606940056702012e-07, "loss": 0.014, "num_tokens": 47858892.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9214839339256287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03187644879473321, "rewards/wordcountpos_reward/raw_geo/std": 0.030046298838688503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1086.8125, "completions/mean_terminated_length": 1027.7857666015625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.22104420884176834, "frac_reward_zero_std": 0.0, "grad_norm": 3.5982543760672043, "kl": 0.0143280029296875, "learning_rate": 9.60565469289068e-07, "loss": 0.0386, "num_tokens": 47910249.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0246981382369995, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010324658578464603, "rewards/wordcountpos_reward/raw_geo/std": 0.09624074913485281, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1016.625, "completions/mean_terminated_length": 1016.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.22124424884976995, "frac_reward_zero_std": 0.0, "grad_norm": 3.3422218474105323, "kl": 0.011505126953125, "learning_rate": 9.604367327143478e-07, "loss": 0.0092, "num_tokens": 47946443.0, "reward": 0.0, "reward_std": 0.9952700734138489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14385924480877452, "rewards/wordcountpos_reward/raw_geo/std": 0.09638461475958361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1096.375, "completions/mean_terminated_length": 1069.4666748046875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.22144428885777157, "frac_reward_zero_std": 0.0, "grad_norm": 3.6080633490197127, "kl": 0.0122833251953125, "learning_rate": 9.603077960088128e-07, "loss": -0.0304, "num_tokens": 47989537.0, "reward": 0.0, "reward_std": 0.7699503898620605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0472935547124475, "rewards/wordcountpos_reward/raw_geo/std": 0.2410029442636135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 871.6875, "completions/mean_terminated_length": 871.6875, "completions/min_length": 589.0, "completions/min_terminated_length": 589.0, "epoch": 0.22164432886577315, "frac_reward_zero_std": 0.0, "grad_norm": 4.066980926253353, "kl": 0.0133056640625, "learning_rate": 9.601786592353334e-07, "loss": -0.007, "num_tokens": 48031116.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9728751182556152, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13546114232943185, "rewards/wordcountpos_reward/raw_geo/std": 0.08543285348546935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14605934866804432, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1162.875, "completions/mean_terminated_length": 1140.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.22184436887377476, "frac_reward_zero_std": 0.0, "grad_norm": 3.4521268437708716, "kl": 0.0103302001953125, "learning_rate": 9.60049322456877e-07, "loss": 0.0557, "num_tokens": 48076762.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8686368465423584, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013034868752373422, "rewards/wordcountpos_reward/raw_geo/std": 0.2089271658399975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1079.4375, "completions/mean_terminated_length": 1019.357177734375, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.22204440888177635, "frac_reward_zero_std": 0.0, "grad_norm": 1.8663965121316035, "kl": 0.005329132080078125, "learning_rate": 9.599197857365091e-07, "loss": 0.0023, "num_tokens": 48110873.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9579967260360718, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08761430715655595, "rewards/wordcountpos_reward/raw_geo/std": 0.14287824230326895, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1025.8125, "completions/mean_terminated_length": 1025.8125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.22224444888977796, "frac_reward_zero_std": 0.0, "grad_norm": 3.7588637799808393, "kl": 0.0133056640625, "learning_rate": 9.597900491373925e-07, "loss": -0.0151, "num_tokens": 48142374.0, "reward": 1.862645149230957e-09, "reward_std": 1.0671457052230835, "rewards/wordcountpos_reward/mean": 1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06101799691617067, "rewards/wordcountpos_reward/raw_geo/std": 0.05754629104442586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1130.1875, "completions/mean_terminated_length": 1130.1875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.22244448889777957, "frac_reward_zero_std": 0.0, "grad_norm": 2.5824466498227743, "kl": 0.00577545166015625, "learning_rate": 9.596601127227868e-07, "loss": 0.0221, "num_tokens": 48192665.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8147425055503845, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15150011814161943, "rewards/wordcountpos_reward/raw_geo/std": 0.05762348104532725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000003, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1168.9375, "completions/mean_terminated_length": 1168.9375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.22264452890578115, "frac_reward_zero_std": 0.0, "grad_norm": 2.876198678325462, "kl": 0.0066375732421875, "learning_rate": 9.5952997655605e-07, "loss": 0.0108, "num_tokens": 48237280.0, "reward": 7.450580596923828e-09, "reward_std": 1.0668383836746216, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16322475765493205, "rewards/wordcountpos_reward/raw_geo/std": 0.07945382332079957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1292.0625, "completions/mean_terminated_length": 1197.5455322265625, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.22284456891378276, "frac_reward_zero_std": 0.0, "grad_norm": 2.7135595372343557, "kl": 0.00738525390625, "learning_rate": 9.59399640700637e-07, "loss": 0.0526, "num_tokens": 48283553.0, "reward": 0.0, "reward_std": 0.4320484697818756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11010380028541027, "rewards/wordcountpos_reward/raw_geo/std": 0.05293052729182484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1047.1875, "completions/mean_terminated_length": 1047.1875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.22304460892178435, "frac_reward_zero_std": 0.0, "grad_norm": 3.4691264425482964, "kl": 0.01140594482421875, "learning_rate": 9.592691052201002e-07, "loss": 0.0508, "num_tokens": 48335100.0, "reward": 0.0, "reward_std": 0.7157114744186401, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07747688649833151, "rewards/wordcountpos_reward/raw_geo/std": 0.08424853145906115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1278.1875, "completions/mean_terminated_length": 1263.4000244140625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.22324464892978596, "frac_reward_zero_std": 0.0, "grad_norm": 2.9849673377282313, "kl": 0.0117340087890625, "learning_rate": 9.59138370178089e-07, "loss": 0.0211, "num_tokens": 48380975.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9721260666847229, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060908954852252485, "rewards/wordcountpos_reward/raw_geo/std": 0.19434850795220185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1162.75, "completions/mean_terminated_length": 1162.75, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.22344468893778754, "frac_reward_zero_std": 0.0, "grad_norm": 2.474536408317372, "kl": 0.00801849365234375, "learning_rate": 9.59007435638351e-07, "loss": 0.0025, "num_tokens": 48422395.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9989966154098511, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018420093141459946, "rewards/wordcountpos_reward/raw_geo/std": 0.08744868483120835, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1278.125, "completions/mean_terminated_length": 1263.3333740234375, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.22364472894578916, "frac_reward_zero_std": 0.0, "grad_norm": 3.2121569806165096, "kl": 0.0118560791015625, "learning_rate": 9.588763016647298e-07, "loss": 0.022, "num_tokens": 48463317.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0513410568237305, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11370960032642238, "rewards/wordcountpos_reward/raw_geo/std": 0.10088892469762108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1092.0625, "completions/mean_terminated_length": 1092.0625, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.22384476895379077, "frac_reward_zero_std": 0.0, "grad_norm": 3.458220733653616, "kl": 0.01068115234375, "learning_rate": 9.587449683211675e-07, "loss": 0.0009, "num_tokens": 48504526.0, "reward": 0.0, "reward_std": 0.9841119050979614, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014842705961776847, "rewards/wordcountpos_reward/raw_geo/std": 0.10813386407886666, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1034.0625, "completions/mean_terminated_length": 1034.0625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.22404480896179235, "frac_reward_zero_std": 0.0, "grad_norm": 3.8127942821617316, "kl": 0.013946533203125, "learning_rate": 9.586134356717026e-07, "loss": -0.0119, "num_tokens": 48540343.0, "reward": 0.0, "reward_std": 0.9207268357276917, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1378950626026639, "rewards/wordcountpos_reward/raw_geo/std": 0.23956508060585524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1206.7333984375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.22424484896979396, "frac_reward_zero_std": 0.0, "grad_norm": 2.2018069503523314, "kl": 0.00780487060546875, "learning_rate": 9.584817037804708e-07, "loss": -0.0297, "num_tokens": 48592896.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9770029187202454, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.032315323023053046, "rewards/wordcountpos_reward/raw_geo/std": 0.11925691920129869, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1025.625, "completions/mean_terminated_length": 1025.625, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.22444488897779555, "frac_reward_zero_std": 0.0, "grad_norm": 3.698286618595617, "kl": 0.011627197265625, "learning_rate": 9.583497727117054e-07, "loss": -0.0067, "num_tokens": 48628706.0, "reward": -7.450580596923828e-09, "reward_std": 1.0062321424484253, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06503153069792313, "rewards/wordcountpos_reward/raw_geo/std": 0.08998514144249806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1307.0625, "completions/mean_terminated_length": 1242.75, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.22464492898579716, "frac_reward_zero_std": 0.0, "grad_norm": 3.273451351840682, "kl": 0.01081085205078125, "learning_rate": 9.582176425297366e-07, "loss": -0.0152, "num_tokens": 48675587.0, "reward": 5.960464477539063e-08, "reward_std": 0.9039405584335327, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10430136300178121, "rewards/wordcountpos_reward/raw_geo/std": 0.1590083314456146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 884.125, "completions/mean_terminated_length": 884.125, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.22484496899379877, "frac_reward_zero_std": 0.0, "grad_norm": 3.548644235465976, "kl": 0.013275146484375, "learning_rate": 9.580853132989916e-07, "loss": -0.0612, "num_tokens": 48704829.0, "reward": 0.0, "reward_std": 0.8891583681106567, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1963759722696244, "rewards/wordcountpos_reward/raw_geo/std": 0.20729454216894955, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1283.75, "completions/mean_terminated_length": 1067.5, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.22504500900180036, "frac_reward_zero_std": 0.0, "grad_norm": 2.9299087106163553, "kl": 0.00789642333984375, "learning_rate": 9.579527850839947e-07, "loss": -0.0191, "num_tokens": 48759401.0, "reward": 0.0, "reward_std": 0.43805375695228577, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16331836598346472, "rewards/wordcountpos_reward/raw_geo/std": 0.16847366690919163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1195.9375, "completions/mean_terminated_length": 1013.5, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.22524504900980197, "frac_reward_zero_std": 0.0, "grad_norm": 2.836403554620855, "kl": 0.01186370849609375, "learning_rate": 9.578200579493674e-07, "loss": 0.0194, "num_tokens": 48813920.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0379304885864258, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1867886348921156, "rewards/wordcountpos_reward/raw_geo/std": 0.3004409839226052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1242.125, "completions/mean_terminated_length": 1224.933349609375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.22544508901780355, "frac_reward_zero_std": 0.0, "grad_norm": 3.0742371251680205, "kl": 0.0081939697265625, "learning_rate": 9.57687131959828e-07, "loss": 0.0089, "num_tokens": 48854578.0, "reward": 0.0, "reward_std": 0.49281227588653564, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05675058672156692, "rewards/wordcountpos_reward/raw_geo/std": 0.2052890713261545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1140.125, "completions/mean_terminated_length": 1140.125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.22564512902580516, "frac_reward_zero_std": 0.0, "grad_norm": 3.464679080041118, "kl": 0.0096893310546875, "learning_rate": 9.575540071801917e-07, "loss": 0.0105, "num_tokens": 48903204.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8845804929733276, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.051096158660686286, "rewards/wordcountpos_reward/raw_geo/std": 0.10200238871034564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1257.0625, "completions/mean_terminated_length": 1111.300048828125, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.22584516903380678, "frac_reward_zero_std": 0.0, "grad_norm": 2.0839412138352342, "kl": 0.01325225830078125, "learning_rate": 9.574206836753708e-07, "loss": 0.0181, "num_tokens": 48939773.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0020803213119507, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11956820538101501, "rewards/wordcountpos_reward/raw_geo/std": 0.06713976474032227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1156.625, "completions/mean_terminated_length": 1156.625, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.22604520904180836, "frac_reward_zero_std": 0.0, "grad_norm": 3.1467857972762303, "kl": 0.00982666015625, "learning_rate": 9.572871615103747e-07, "loss": -0.0235, "num_tokens": 48993623.0, "reward": 0.0, "reward_std": 0.6768962740898132, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2169620158247195, "rewards/wordcountpos_reward/raw_geo/std": 0.2526734056268605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1108.875, "completions/mean_terminated_length": 1108.875, "completions/min_length": 407.0, "completions/min_terminated_length": 407.0, "epoch": 0.22624524904980997, "frac_reward_zero_std": 0.0, "grad_norm": 2.5548248136724037, "kl": 0.00775146484375, "learning_rate": 9.57153440750309e-07, "loss": -0.0419, "num_tokens": 49035437.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6830762624740601, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015143340831155217, "rewards/wordcountpos_reward/raw_geo/std": 0.21444756602406698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1167.8125, "completions/mean_terminated_length": 1167.8125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.22644528905781156, "frac_reward_zero_std": 0.0, "grad_norm": 3.4320774059457047, "kl": 0.012542724609375, "learning_rate": 9.570195214603767e-07, "loss": -0.0185, "num_tokens": 49079130.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0202593803405762, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03801267092001991, "rewards/wordcountpos_reward/raw_geo/std": 0.11349513414927752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1234.75, "completions/mean_terminated_length": 1217.0667724609375, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.22664532906581317, "frac_reward_zero_std": 0.0, "grad_norm": 3.2227580728503566, "kl": 0.011871337890625, "learning_rate": 9.568854037058776e-07, "loss": -0.0124, "num_tokens": 49121486.0, "reward": 0.0, "reward_std": 0.5776551961898804, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02580008849692016, "rewards/wordcountpos_reward/raw_geo/std": 0.28219799650593286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1179.0625, "completions/mean_terminated_length": 1157.666748046875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.22684536907381475, "frac_reward_zero_std": 0.0, "grad_norm": 3.036328135469364, "kl": 0.0126800537109375, "learning_rate": 9.567510875522081e-07, "loss": 0.0139, "num_tokens": 49169599.0, "reward": 0.0, "reward_std": 0.4948246479034424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14580489580228434, "rewards/wordcountpos_reward/raw_geo/std": 0.39604311801128295, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1239.4375, "completions/mean_terminated_length": 1152.5833740234375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.22704540908181636, "frac_reward_zero_std": 0.0, "grad_norm": 2.9692546080182995, "kl": 0.0095977783203125, "learning_rate": 9.566165730648613e-07, "loss": 0.0074, "num_tokens": 49224798.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5619721412658691, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037732782637500226, "rewards/wordcountpos_reward/raw_geo/std": 0.10634662945839567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 849.0, "completions/mean_length": 1140.25, "completions/mean_terminated_length": 780.5, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.22724544908981797, "frac_reward_zero_std": 0.0, "grad_norm": 1.983648284010815, "kl": 0.00554656982421875, "learning_rate": 9.56481860309427e-07, "loss": -0.0273, "num_tokens": 49259602.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0375480651855469, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08967808831111865, "rewards/wordcountpos_reward/raw_geo/std": 0.04869483669544209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1198.9375, "completions/mean_terminated_length": 1129.4615478515625, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.22744548909781956, "frac_reward_zero_std": 0.0, "grad_norm": 3.1990311145866666, "kl": 0.0085906982421875, "learning_rate": 9.563469493515917e-07, "loss": -0.0417, "num_tokens": 49312369.0, "reward": 0.0, "reward_std": 0.8134428262710571, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006953747030456145, "rewards/wordcountpos_reward/raw_geo/std": 0.07126850547692305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.6708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1293.6875, "completions/mean_terminated_length": 1264.21435546875, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.22764552910582117, "frac_reward_zero_std": 0.0, "grad_norm": 2.9646538707044643, "kl": 0.009735107421875, "learning_rate": 9.562118402571387e-07, "loss": 0.0256, "num_tokens": 49366948.0, "reward": 0.0, "reward_std": 0.9062649011611938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1755036323143243, "rewards/wordcountpos_reward/raw_geo/std": 0.13758589289456213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1038.0, "completions/mean_terminated_length": 1038.0, "completions/min_length": 634.0, "completions/min_terminated_length": 634.0, "epoch": 0.22784556911382275, "frac_reward_zero_std": 0.0, "grad_norm": 3.465762852651177, "kl": 0.0115203857421875, "learning_rate": 9.56076533091948e-07, "loss": -0.0162, "num_tokens": 49414388.0, "reward": 0.0, "reward_std": 0.8003247380256653, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025844836836400476, "rewards/wordcountpos_reward/raw_geo/std": 0.14025250485777443, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1118.25, "completions/mean_terminated_length": 1118.25, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.22804560912182437, "frac_reward_zero_std": 0.0, "grad_norm": 2.9878308663697273, "kl": 0.0159454345703125, "learning_rate": 9.559410279219959e-07, "loss": -0.0608, "num_tokens": 49465936.0, "reward": 0.0, "reward_std": 0.9567099809646606, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13945296066673965, "rewards/wordcountpos_reward/raw_geo/std": 0.09788984981907481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901162, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1278.1875, "completions/mean_terminated_length": 1145.0999755859375, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.22824564912982598, "frac_reward_zero_std": 0.0, "grad_norm": 2.857512459046222, "kl": 0.00829315185546875, "learning_rate": 9.55805324813355e-07, "loss": 0.0269, "num_tokens": 49502515.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8453488349914551, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01508588083925872, "rewards/wordcountpos_reward/raw_geo/std": 0.15094952534510675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14291929864761418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1179.8125, "completions/mean_terminated_length": 1179.8125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.22844568913782756, "frac_reward_zero_std": 0.0, "grad_norm": 2.477461226157233, "kl": 0.006866455078125, "learning_rate": 9.55669423832195e-07, "loss": -0.0328, "num_tokens": 49544600.0, "reward": 0.0, "reward_std": 0.5849494934082031, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05452017226884918, "rewards/wordcountpos_reward/raw_geo/std": 0.1440268624736924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1335.0625, "completions/mean_terminated_length": 1170.125, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.22864572914582917, "frac_reward_zero_std": 0.0, "grad_norm": 2.6088982419374096, "kl": 0.0110626220703125, "learning_rate": 9.555333250447819e-07, "loss": -0.0478, "num_tokens": 49600137.0, "reward": 0.0, "reward_std": 1.0005755424499512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18280361337928605, "rewards/wordcountpos_reward/raw_geo/std": 0.14928929397196405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1212.3125, "completions/mean_terminated_length": 1212.3125, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.22884576915383076, "frac_reward_zero_std": 0.0, "grad_norm": 2.4756730483559126, "kl": 0.00616455078125, "learning_rate": 9.55397028517478e-07, "loss": -0.0, "num_tokens": 49642414.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9680562019348145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11487212273724176, "rewards/wordcountpos_reward/raw_geo/std": 0.0633203560402893, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1281.0625, "completions/mean_terminated_length": 1266.4666748046875, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.22904580916183237, "frac_reward_zero_std": 0.0, "grad_norm": 2.1293868259622153, "kl": 0.00514984130859375, "learning_rate": 9.552605343167422e-07, "loss": 0.0115, "num_tokens": 49691319.0, "reward": 0.0, "reward_std": 1.000067949295044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16547145357457368, "rewards/wordcountpos_reward/raw_geo/std": 0.313838368698743, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1028.125, "completions/mean_terminated_length": 1028.125, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.22924584916983395, "frac_reward_zero_std": 0.0, "grad_norm": 3.3411597688026453, "kl": 0.010162353515625, "learning_rate": 9.551238425091295e-07, "loss": 0.0067, "num_tokens": 49731225.0, "reward": 0.0, "reward_std": 0.7595741748809814, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06714213245505474, "rewards/wordcountpos_reward/raw_geo/std": 0.0860507250350992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1006.5625, "completions/mean_terminated_length": 1006.5625, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.22944588917783557, "frac_reward_zero_std": 0.0, "grad_norm": 3.967118853695208, "kl": 0.0144195556640625, "learning_rate": 9.549869531612918e-07, "loss": -0.0047, "num_tokens": 49766530.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0180479288101196, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10311899965987689, "rewards/wordcountpos_reward/raw_geo/std": 0.12959071565285613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 928.0, "completions/mean_terminated_length": 928.0, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.22964592918583718, "frac_reward_zero_std": 0.0, "grad_norm": 3.593591430899529, "kl": 0.00914764404296875, "learning_rate": 9.548498663399764e-07, "loss": -0.0264, "num_tokens": 49802122.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5304368734359741, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10104776365921816, "rewards/wordcountpos_reward/raw_geo/std": 0.23351574491728172, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1309.1875, "completions/mean_terminated_length": 1281.9285888671875, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.22984596919383876, "frac_reward_zero_std": 0.0, "grad_norm": 3.1839698434067714, "kl": 0.0119781494140625, "learning_rate": 9.54712582112028e-07, "loss": -0.0143, "num_tokens": 49855869.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9298601150512695, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04880521198151409, "rewards/wordcountpos_reward/raw_geo/std": 0.10081132530439563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1403039029577766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1249.25, "completions/mean_terminated_length": 1232.533447265625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.23004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 3.209259165924364, "kl": 0.0108795166015625, "learning_rate": 9.545751005443868e-07, "loss": -0.0606, "num_tokens": 49904537.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9427385330200195, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10436095468001841, "rewards/wordcountpos_reward/raw_geo/std": 0.26268860188677623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1134.6875, "completions/mean_terminated_length": 1050.3846435546875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.23024604920984196, "frac_reward_zero_std": 0.0, "grad_norm": 3.354877516071196, "kl": 0.010467529296875, "learning_rate": 9.544374217040894e-07, "loss": 0.0049, "num_tokens": 49956292.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0051803588867188, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01190230620216923, "rewards/wordcountpos_reward/raw_geo/std": 0.191618457315505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1101345977866612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1482.125, "completions/mean_terminated_length": 1452.3333740234375, "completions/min_length": 1389.0, "completions/min_terminated_length": 1389.0, "epoch": 0.23044608921784357, "frac_reward_zero_std": 0.0, "grad_norm": 2.6223389992079498, "kl": 0.0088348388671875, "learning_rate": 9.542995456582687e-07, "loss": 0.0033, "num_tokens": 50005134.0, "reward": 0.0, "reward_std": 0.947304368019104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12606385386716026, "rewards/wordcountpos_reward/raw_geo/std": 0.12027285791891168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1077.5625, "completions/mean_terminated_length": 1077.5625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.23064612922584518, "frac_reward_zero_std": 0.0, "grad_norm": 3.0286624973166134, "kl": 0.00902557373046875, "learning_rate": 9.541614724741535e-07, "loss": 0.0104, "num_tokens": 50045975.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9524807929992676, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20306906647160788, "rewards/wordcountpos_reward/raw_geo/std": 0.18801419388294252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1281.0, "completions/mean_terminated_length": 1266.4000244140625, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.23084616923384677, "frac_reward_zero_std": 0.0, "grad_norm": 3.049303298317739, "kl": 0.01068115234375, "learning_rate": 9.540232022190694e-07, "loss": -0.0387, "num_tokens": 50097655.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0689671039581299, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03316920708299982, "rewards/wordcountpos_reward/raw_geo/std": 0.05379554502182039, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1061.6875, "completions/mean_terminated_length": 1061.6875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.23104620924184838, "frac_reward_zero_std": 0.0, "grad_norm": 3.730527631698179, "kl": 0.01397705078125, "learning_rate": 9.538847349604369e-07, "loss": -0.0586, "num_tokens": 50135106.0, "reward": 2.2351741790771484e-08, "reward_std": 1.064586877822876, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06578180754015497, "rewards/wordcountpos_reward/raw_geo/std": 0.07157985794018576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1062.9375, "completions/mean_terminated_length": 1033.800048828125, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.23124624924984996, "frac_reward_zero_std": 0.0, "grad_norm": 3.643396681482157, "kl": 0.015380859375, "learning_rate": 9.53746070765774e-07, "loss": -0.0178, "num_tokens": 50188409.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0249221324920654, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2256795445270049, "rewards/wordcountpos_reward/raw_geo/std": 0.14552988590190669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1020.625, "completions/mean_terminated_length": 1020.625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.23144628925785157, "frac_reward_zero_std": 0.0, "grad_norm": 2.9098740253943993, "kl": 0.008544921875, "learning_rate": 9.536072097026933e-07, "loss": -0.0422, "num_tokens": 50226579.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9652504920959473, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10180699751230987, "rewards/wordcountpos_reward/raw_geo/std": 0.2258504709352916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1088.75, "completions/mean_terminated_length": 1088.75, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.23164632926585316, "frac_reward_zero_std": 0.0, "grad_norm": 3.219279287198751, "kl": 0.011138916015625, "learning_rate": 9.534681518389045e-07, "loss": -0.0217, "num_tokens": 50269175.0, "reward": 1.4901161193847656e-08, "reward_std": 1.006312370300293, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004155747679554641, "rewards/wordcountpos_reward/raw_geo/std": 0.19588092976753227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.23184636927385477, "frac_reward_zero_std": 0.0, "grad_norm": 3.2809789166485213, "kl": 0.0099639892578125, "learning_rate": 9.533288972422126e-07, "loss": -0.0238, "num_tokens": 50308723.0, "reward": -3.725290298461914e-08, "reward_std": 1.0568022727966309, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02578697472644781, "rewards/wordcountpos_reward/raw_geo/std": 0.11181167160441836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1323.75, "completions/mean_terminated_length": 1218.0, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.23204640928185638, "frac_reward_zero_std": 0.0, "grad_norm": 3.4299575068382664, "kl": 0.0163116455078125, "learning_rate": 9.531894459805192e-07, "loss": -0.0054, "num_tokens": 50361895.0, "reward": 0.0, "reward_std": 0.6450403332710266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05441950640453444, "rewards/wordcountpos_reward/raw_geo/std": 0.0722647779333742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1053.375, "completions/mean_terminated_length": 1023.6000366210938, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.23224644928985796, "frac_reward_zero_std": 0.0, "grad_norm": 1.5676348590334177, "kl": 0.003814697265625, "learning_rate": 9.53049798121821e-07, "loss": -0.0067, "num_tokens": 50395189.0, "reward": 0.0, "reward_std": 0.9334632158279419, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11211344232712508, "rewards/wordcountpos_reward/raw_geo/std": 0.10919833329008882, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1163.25, "completions/mean_terminated_length": 1140.800048828125, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.23244648929785958, "frac_reward_zero_std": 0.0, "grad_norm": 3.555029809478265, "kl": 0.0130462646484375, "learning_rate": 9.52909953734211e-07, "loss": -0.0273, "num_tokens": 50443057.0, "reward": 0.0, "reward_std": 1.0423115491867065, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20747905884940018, "rewards/wordcountpos_reward/raw_geo/std": 0.18202950707904117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1218.60009765625, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.23264652930586116, "frac_reward_zero_std": 0.0, "grad_norm": 2.80659615019871, "kl": 0.0106048583984375, "learning_rate": 9.527699128858779e-07, "loss": -0.0257, "num_tokens": 50488956.0, "reward": -7.450580596923828e-09, "reward_std": 1.0094435214996338, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11969988334729055, "rewards/wordcountpos_reward/raw_geo/std": 0.24580626314050372, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1004.75, "completions/mean_terminated_length": 1004.75, "completions/min_length": 481.0, "completions/min_terminated_length": 481.0, "epoch": 0.23284656931386277, "frac_reward_zero_std": 0.0, "grad_norm": 3.5443095289446336, "kl": 0.00855255126953125, "learning_rate": 9.526296756451065e-07, "loss": -0.0046, "num_tokens": 50520216.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0298347473144531, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013478362850192306, "rewards/wordcountpos_reward/raw_geo/std": 0.03204082669449738, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 826.875, "completions/mean_terminated_length": 826.875, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.23304660932186438, "frac_reward_zero_std": 0.0, "grad_norm": 3.070213035890712, "kl": 0.00722503662109375, "learning_rate": 9.524892420802769e-07, "loss": -0.026, "num_tokens": 50568742.0, "reward": 0.0, "reward_std": 0.8031182289123535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2236438509358696, "rewards/wordcountpos_reward/raw_geo/std": 0.2675904732837485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1257.875, "completions/mean_terminated_length": 1112.5999755859375, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.23324664932986597, "frac_reward_zero_std": 0.0, "grad_norm": 3.4068947855956675, "kl": 0.010833740234375, "learning_rate": 9.523486122598652e-07, "loss": -0.0502, "num_tokens": 50621972.0, "reward": -5.960464477539063e-08, "reward_std": 0.633068323135376, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05564615635771998, "rewards/wordcountpos_reward/raw_geo/std": 0.04240078724855802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1202.5625, "completions/mean_terminated_length": 1202.5625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.23344668933786758, "frac_reward_zero_std": 0.0, "grad_norm": 2.8533288749958, "kl": 0.014068603515625, "learning_rate": 9.522077862524432e-07, "loss": -0.0125, "num_tokens": 50674229.0, "reward": 0.0, "reward_std": 0.9095343351364136, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07193658663866266, "rewards/wordcountpos_reward/raw_geo/std": 0.18047118540213775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1085.1875, "completions/mean_terminated_length": 1085.1875, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.23364672934586916, "frac_reward_zero_std": 0.0, "grad_norm": 3.5875487496981, "kl": 0.012939453125, "learning_rate": 9.520667641266781e-07, "loss": 0.0056, "num_tokens": 50719248.0, "reward": 0.0, "reward_std": 0.6857028007507324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03702286804320375, "rewards/wordcountpos_reward/raw_geo/std": 0.138005601771996, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1257.8125, "completions/mean_terminated_length": 1257.8125, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.23384676935387078, "frac_reward_zero_std": 0.0, "grad_norm": 2.6415271546142294, "kl": 0.00934600830078125, "learning_rate": 9.519255459513332e-07, "loss": -0.0625, "num_tokens": 50772357.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9801534414291382, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08769029612081755, "rewards/wordcountpos_reward/raw_geo/std": 0.07120901126223715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1127.75, "completions/mean_terminated_length": 1127.75, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.2340468093618724, "frac_reward_zero_std": 0.0, "grad_norm": 3.276188114267022, "kl": 0.0131378173828125, "learning_rate": 9.517841317952668e-07, "loss": 0.0353, "num_tokens": 50815889.0, "reward": 0.0, "reward_std": 0.9212627410888672, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20728460579494976, "rewards/wordcountpos_reward/raw_geo/std": 0.07644931995242212, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 973.9375, "completions/mean_terminated_length": 973.9375, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.23424684936987397, "frac_reward_zero_std": 0.0, "grad_norm": 3.742404127644982, "kl": 0.0117950439453125, "learning_rate": 9.516425217274333e-07, "loss": -0.0337, "num_tokens": 50844920.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7345133423805237, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024381230816104372, "rewards/wordcountpos_reward/raw_geo/std": 0.10279194730127855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1126.0625, "completions/mean_terminated_length": 1126.0625, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.23444688937787558, "frac_reward_zero_std": 0.0, "grad_norm": 2.3929706319840025, "kl": 0.00710296630859375, "learning_rate": 9.515007158168826e-07, "loss": 0.0094, "num_tokens": 50885457.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9731065034866333, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11804107636006356, "rewards/wordcountpos_reward/raw_geo/std": 0.09032435372091181, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1235.3125, "completions/mean_terminated_length": 1147.0833740234375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.23464692938587717, "frac_reward_zero_std": 0.0, "grad_norm": 2.934457807058159, "kl": 0.00884246826171875, "learning_rate": 9.513587141327596e-07, "loss": -0.0366, "num_tokens": 50931166.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7465544939041138, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008823395865892241, "rewards/wordcountpos_reward/raw_geo/std": 0.15549696475952096, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 940.625, "completions/mean_terminated_length": 940.625, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.23484696939387878, "frac_reward_zero_std": 0.0, "grad_norm": 4.047040221635227, "kl": 0.012542724609375, "learning_rate": 9.512165167443049e-07, "loss": -0.0133, "num_tokens": 50971176.0, "reward": 0.0, "reward_std": 0.8164693713188171, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.31286761631768606, "rewards/wordcountpos_reward/raw_geo/std": 0.23903127868433727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1207.875, "completions/mean_terminated_length": 1188.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.23504700940188036, "frac_reward_zero_std": 0.0, "grad_norm": 2.9908549695390736, "kl": 0.0099945068359375, "learning_rate": 9.510741237208549e-07, "loss": -0.035, "num_tokens": 51017710.0, "reward": 0.0, "reward_std": 0.9569621682167053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0778112573721702, "rewards/wordcountpos_reward/raw_geo/std": 0.1041567795106529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1125.3125, "completions/mean_terminated_length": 1100.3333740234375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.23524704940988198, "frac_reward_zero_std": 0.0, "grad_norm": 3.2818632108914927, "kl": 0.0149688720703125, "learning_rate": 9.509315351318409e-07, "loss": -0.0388, "num_tokens": 51054315.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3323240876197815, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0043583424699854025, "rewards/wordcountpos_reward/raw_geo/std": 0.283483378983603, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1049.75, "completions/mean_terminated_length": 1049.75, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.2354470894178836, "frac_reward_zero_std": 0.0, "grad_norm": 3.970046876097469, "kl": 0.0124969482421875, "learning_rate": 9.507887510467898e-07, "loss": -0.0157, "num_tokens": 51091903.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8134541511535645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10165428380277537, "rewards/wordcountpos_reward/raw_geo/std": 0.134847942665467, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 1023.9375, "completions/mean_terminated_length": 1023.9375, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.23564712942588517, "frac_reward_zero_std": 0.0, "grad_norm": 3.1221687934548075, "kl": 0.01165771484375, "learning_rate": 9.506457715353236e-07, "loss": 0.0035, "num_tokens": 51131302.0, "reward": 0.0, "reward_std": 0.8757616281509399, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10334395655738081, "rewards/wordcountpos_reward/raw_geo/std": 0.1182151653097622, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1245.8125, "completions/mean_terminated_length": 1245.8125, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.23584716943388678, "frac_reward_zero_std": 0.0, "grad_norm": 2.301489027494201, "kl": 0.006927490234375, "learning_rate": 9.505025966671601e-07, "loss": 0.0011, "num_tokens": 51178699.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8741211891174316, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00703365811828602, "rewards/wordcountpos_reward/raw_geo/std": 0.08853917913110107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.23604720944188837, "frac_reward_zero_std": 0.0, "grad_norm": 3.45187718623664, "kl": 0.010833740234375, "learning_rate": 9.503592265121117e-07, "loss": -0.075, "num_tokens": 51209609.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9848106503486633, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037713592306805835, "rewards/wordcountpos_reward/raw_geo/std": 0.07878511564334237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1100.125, "completions/mean_terminated_length": 1100.125, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.23624724944988998, "frac_reward_zero_std": 0.0, "grad_norm": 3.632077568255943, "kl": 0.011260986328125, "learning_rate": 9.502156611400866e-07, "loss": 0.0433, "num_tokens": 51260811.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0279598236083984, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09201749942055977, "rewards/wordcountpos_reward/raw_geo/std": 0.12349186566069431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7999999999999999, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1077.125, "completions/mean_terminated_length": 1077.125, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.2364472894578916, "frac_reward_zero_std": 0.0, "grad_norm": 3.5967108465297017, "kl": 0.011383056640625, "learning_rate": 9.500719006210877e-07, "loss": 0.0161, "num_tokens": 51304605.0, "reward": 7.450580596923828e-09, "reward_std": 1.0358760356903076, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04038930341256521, "rewards/wordcountpos_reward/raw_geo/std": 0.0815503655116324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1130.375, "completions/mean_terminated_length": 1130.375, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.23664732946589317, "frac_reward_zero_std": 0.0, "grad_norm": 2.578118126851255, "kl": 0.0072784423828125, "learning_rate": 9.499279450252134e-07, "loss": -0.0177, "num_tokens": 51354531.0, "reward": 0.0, "reward_std": 1.0480678081512451, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.050917432313345945, "rewards/wordcountpos_reward/raw_geo/std": 0.09279703232527604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1222.8125, "completions/mean_terminated_length": 1158.84619140625, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.2368473694738948, "frac_reward_zero_std": 0.0, "grad_norm": 3.3309845637507802, "kl": 0.011810302734375, "learning_rate": 9.49783794422657e-07, "loss": 0.0026, "num_tokens": 51401072.0, "reward": 0.0, "reward_std": 0.8732857704162598, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.37882699098577566, "rewards/wordcountpos_reward/raw_geo/std": 0.2815182668443314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1158.625, "completions/mean_terminated_length": 953.7999877929688, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.23704740948189637, "frac_reward_zero_std": 0.0, "grad_norm": 4.920258931475021, "kl": 0.026824951171875, "learning_rate": 9.496394488837071e-07, "loss": -0.0223, "num_tokens": 51455730.0, "reward": 0.0, "reward_std": 0.8452078104019165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15884302512650458, "rewards/wordcountpos_reward/raw_geo/std": 0.19093395358271534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994054, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1108.0625, "completions/mean_terminated_length": 1108.0625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.23724744948989798, "frac_reward_zero_std": 0.0, "grad_norm": 3.0700632274382325, "kl": 0.010589599609375, "learning_rate": 9.494949084787472e-07, "loss": -0.0124, "num_tokens": 51503443.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9316726326942444, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00045393816161834596, "rewards/wordcountpos_reward/raw_geo/std": 0.0679674895431341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1093.8125, "completions/mean_terminated_length": 1093.8125, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.23744748949789957, "frac_reward_zero_std": 0.0, "grad_norm": 3.3904447841287713, "kl": 0.00940704345703125, "learning_rate": 9.493501732782559e-07, "loss": 0.0042, "num_tokens": 51545072.0, "reward": 0.0, "reward_std": 0.7127798199653625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0044994186072670915, "rewards/wordcountpos_reward/raw_geo/std": 0.14264825119462698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1221.0625, "completions/mean_terminated_length": 1181.21435546875, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.23764752950590118, "frac_reward_zero_std": 0.0, "grad_norm": 3.580893822264323, "kl": 0.0134429931640625, "learning_rate": 9.492052433528065e-07, "loss": 0.0136, "num_tokens": 51587025.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7911562919616699, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07228127403334517, "rewards/wordcountpos_reward/raw_geo/std": 0.09628089891620452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1261.5625, "completions/mean_terminated_length": 1118.5, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.2378475695139028, "frac_reward_zero_std": 0.0, "grad_norm": 3.0087998310251627, "kl": 0.0115814208984375, "learning_rate": 9.490601187730679e-07, "loss": 0.0073, "num_tokens": 51637242.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7797181010246277, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02010349009650849, "rewards/wordcountpos_reward/raw_geo/std": 0.1083281152268591, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1350.1875, "completions/mean_terminated_length": 1282.0909423828125, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.23804760952190437, "frac_reward_zero_std": 0.0, "grad_norm": 2.822008200619165, "kl": 0.0103759765625, "learning_rate": 9.489147996098031e-07, "loss": -0.0121, "num_tokens": 51683349.0, "reward": 0.0, "reward_std": 0.7279618382453918, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03702802984335253, "rewards/wordcountpos_reward/raw_geo/std": 0.04703194141491271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 993.0625, "completions/mean_terminated_length": 993.0625, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.23824764952990599, "frac_reward_zero_std": 0.0, "grad_norm": 3.701882399841415, "kl": 0.013031005859375, "learning_rate": 9.487692859338709e-07, "loss": 0.0112, "num_tokens": 51722030.0, "reward": 0.0, "reward_std": 0.9010850191116333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.009806227419362545, "rewards/wordcountpos_reward/raw_geo/std": 0.1905747634511497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1117.25, "completions/mean_terminated_length": 1117.25, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.23844768953790757, "frac_reward_zero_std": 0.0, "grad_norm": 2.810505389442069, "kl": 0.00878143310546875, "learning_rate": 9.486235778162238e-07, "loss": -0.0168, "num_tokens": 51763978.0, "reward": 0.0, "reward_std": 0.5175235271453857, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14803947345642846, "rewards/wordcountpos_reward/raw_geo/std": 0.07354937170211703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0893391374565564, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1100.25, "completions/mean_terminated_length": 1100.25, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.23864772954590918, "frac_reward_zero_std": 0.0, "grad_norm": 1.4596249986709346, "kl": 0.0035524368286132812, "learning_rate": 9.484776753279101e-07, "loss": 0.0254, "num_tokens": 51805126.0, "reward": 0.0, "reward_std": 0.8161255717277527, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01029224185612476, "rewards/wordcountpos_reward/raw_geo/std": 0.09661796965647951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1188.3125, "completions/mean_terminated_length": 1046.6363525390625, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2388477695539108, "frac_reward_zero_std": 0.0, "grad_norm": 2.576092245152571, "kl": 0.0103759765625, "learning_rate": 9.483315785400726e-07, "loss": 0.0282, "num_tokens": 51842611.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0089211463928223, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03627399429555535, "rewards/wordcountpos_reward/raw_geo/std": 0.03640598896562315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1213.8125, "completions/mean_terminated_length": 1147.769287109375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.23904780956191238, "frac_reward_zero_std": 0.0, "grad_norm": 3.034171019076665, "kl": 0.0101165771484375, "learning_rate": 9.481852875239485e-07, "loss": -0.0149, "num_tokens": 51887184.0, "reward": 0.0, "reward_std": 1.0333211421966553, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12280088125068324, "rewards/wordcountpos_reward/raw_geo/std": 0.10592501670000959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 990.75, "completions/mean_terminated_length": 990.75, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.239247849569914, "frac_reward_zero_std": 0.0, "grad_norm": 3.9208670482912544, "kl": 0.0118408203125, "learning_rate": 9.480388023508702e-07, "loss": -0.0063, "num_tokens": 51930092.0, "reward": 0.0, "reward_std": 0.8676304221153259, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18921344064974438, "rewards/wordcountpos_reward/raw_geo/std": 0.06726169666722093, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1227.625, "completions/mean_terminated_length": 1164.769287109375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.23944788957791557, "frac_reward_zero_std": 0.0, "grad_norm": 3.306511428173286, "kl": 0.0122222900390625, "learning_rate": 9.478921230922643e-07, "loss": 0.0163, "num_tokens": 51982774.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9336946606636047, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012036807832469078, "rewards/wordcountpos_reward/raw_geo/std": 0.06759350200397629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1033.25, "completions/mean_terminated_length": 1033.25, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.23964792958591719, "frac_reward_zero_std": 0.0, "grad_norm": 3.050025602290657, "kl": 0.0113525390625, "learning_rate": 9.477452498196526e-07, "loss": -0.0074, "num_tokens": 52023714.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0611441135406494, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08425284284808922, "rewards/wordcountpos_reward/raw_geo/std": 0.06517950996801065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1055.25, "completions/mean_terminated_length": 1055.25, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.2398479695939188, "frac_reward_zero_std": 0.0, "grad_norm": 3.715366466434405, "kl": 0.0133819580078125, "learning_rate": 9.475981826046507e-07, "loss": -0.0157, "num_tokens": 52067366.0, "reward": 0.0, "reward_std": 0.7970370054244995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05611734152814263, "rewards/wordcountpos_reward/raw_geo/std": 0.055541232660735886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.24004800960192038, "frac_reward_zero_std": 0.0, "grad_norm": 3.8726900672960065, "kl": 0.01165771484375, "learning_rate": 9.474509215189696e-07, "loss": -0.0013, "num_tokens": 52097989.0, "reward": 0.0, "reward_std": 1.034703016281128, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1372414006268383, "rewards/wordcountpos_reward/raw_geo/std": 0.09141674453585732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1182.1875, "completions/mean_terminated_length": 1108.84619140625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.240248049609922, "frac_reward_zero_std": 0.0, "grad_norm": 3.1137361802696395, "kl": 0.01104736328125, "learning_rate": 9.473034666344144e-07, "loss": 0.0152, "num_tokens": 52143624.0, "reward": 0.0, "reward_std": 0.8156384229660034, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04372307530559236, "rewards/wordcountpos_reward/raw_geo/std": 0.06022969349498634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 949.875, "completions/mean_terminated_length": 949.875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.24044808961792358, "frac_reward_zero_std": 0.0, "grad_norm": 2.7111420652674334, "kl": 0.00537872314453125, "learning_rate": 9.471558180228846e-07, "loss": -0.0416, "num_tokens": 52183870.0, "reward": 0.0, "reward_std": 1.040045142173767, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11768846747825505, "rewards/wordcountpos_reward/raw_geo/std": 0.09113578164705738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1307.375, "completions/mean_terminated_length": 1191.800048828125, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.2406481296259252, "frac_reward_zero_std": 0.0, "grad_norm": 2.636500778830953, "kl": 0.009918212890625, "learning_rate": 9.470079757563746e-07, "loss": -0.0298, "num_tokens": 52239284.0, "reward": 0.0, "reward_std": 0.988853394985199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1167864628785271, "rewards/wordcountpos_reward/raw_geo/std": 0.15083617038675007, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1108.625, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.24084816963392677, "frac_reward_zero_std": 0.0, "grad_norm": 3.2883015875013277, "kl": 0.0121917724609375, "learning_rate": 9.468599399069729e-07, "loss": -0.0293, "num_tokens": 52283758.0, "reward": 1.862645149230957e-08, "reward_std": 1.0627440214157104, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004401873162290123, "rewards/wordcountpos_reward/raw_geo/std": 0.09510812136194687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668904, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1130.0, "completions/mean_terminated_length": 1130.0, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.24104820964192838, "frac_reward_zero_std": 0.0, "grad_norm": 2.2424861077247162, "kl": 0.00669097900390625, "learning_rate": 9.467117105468623e-07, "loss": -0.0009, "num_tokens": 52327742.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8811639547348022, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1314306789294673, "rewards/wordcountpos_reward/raw_geo/std": 0.13167323837984155, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.24124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 3.9897326750244595, "kl": 0.013275146484375, "learning_rate": 9.465632877483203e-07, "loss": 0.0176, "num_tokens": 52364948.0, "reward": 0.0, "reward_std": 0.6958979964256287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005988940108211079, "rewards/wordcountpos_reward/raw_geo/std": 0.06471472546620861, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1270.0625, "completions/mean_terminated_length": 1237.21435546875, "completions/min_length": 1060.0, "completions/min_terminated_length": 1060.0, "epoch": 0.24144828965793158, "frac_reward_zero_std": 0.0, "grad_norm": 1.5081031630971902, "kl": 0.00339508056640625, "learning_rate": 9.464146715837185e-07, "loss": 0.0148, "num_tokens": 52421301.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9385417699813843, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18336531727274233, "rewards/wordcountpos_reward/raw_geo/std": 0.06303006376165128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1080.1875, "completions/mean_terminated_length": 1080.1875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.2416483296659332, "frac_reward_zero_std": 0.0, "grad_norm": 3.2180471472785657, "kl": 0.01318359375, "learning_rate": 9.462658621255226e-07, "loss": -0.0149, "num_tokens": 52463816.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7174466848373413, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09307877163419827, "rewards/wordcountpos_reward/raw_geo/std": 0.08339463691108169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1236.125, "completions/mean_terminated_length": 1218.533447265625, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.24184836967393478, "frac_reward_zero_std": 0.0, "grad_norm": 2.7771094569910564, "kl": 0.009918212890625, "learning_rate": 9.461168594462931e-07, "loss": -0.0384, "num_tokens": 52499402.0, "reward": 0.0, "reward_std": 0.4614540636539459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025831890907897573, "rewards/wordcountpos_reward/raw_geo/std": 0.187955319239083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1175.3077392578125, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.2420484096819364, "frac_reward_zero_std": 0.0, "grad_norm": 2.29070496026933, "kl": 0.006862640380859375, "learning_rate": 9.459676636186839e-07, "loss": -0.024, "num_tokens": 52546757.0, "reward": 0.0, "reward_std": 0.7864881157875061, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0476956622955646, "rewards/wordcountpos_reward/raw_geo/std": 0.07896374044735588, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1089.75, "completions/mean_terminated_length": 1031.1429443359375, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.242248449689938, "frac_reward_zero_std": 0.0, "grad_norm": 2.393462248816949, "kl": 0.009674072265625, "learning_rate": 9.458182747154441e-07, "loss": 0.0065, "num_tokens": 52587857.0, "reward": 0.0, "reward_std": 0.5506587028503418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04336147948570137, "rewards/wordcountpos_reward/raw_geo/std": 0.07856243535963454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1221.6875, "completions/mean_terminated_length": 1157.4615478515625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.24244848969793958, "frac_reward_zero_std": 0.0, "grad_norm": 3.5084249508853333, "kl": 0.01727294921875, "learning_rate": 9.456686928094162e-07, "loss": -0.0174, "num_tokens": 52639788.0, "reward": 0.0, "reward_std": 0.4595772922039032, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053300381502168905, "rewards/wordcountpos_reward/raw_geo/std": 0.06024540174495668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1202.5, "completions/mean_terminated_length": 1182.666748046875, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.2426485297059412, "frac_reward_zero_std": 0.0, "grad_norm": 2.841871261585183, "kl": 0.00888824462890625, "learning_rate": 9.455189179735369e-07, "loss": 0.0094, "num_tokens": 52680164.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4850025773048401, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07535187466167527, "rewards/wordcountpos_reward/raw_geo/std": 0.08034663872131993, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1301.8125, "completions/mean_terminated_length": 1256.0770263671875, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.24284856971394278, "frac_reward_zero_std": 0.0, "grad_norm": 3.1168367910611146, "kl": 0.01312255859375, "learning_rate": 9.453689502808372e-07, "loss": -0.0337, "num_tokens": 52732921.0, "reward": 0.0, "reward_std": 0.5940068364143372, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03526778600763611, "rewards/wordcountpos_reward/raw_geo/std": 0.03642441276404843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1153.75, "completions/mean_terminated_length": 1153.75, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.2430486097219444, "frac_reward_zero_std": 0.0, "grad_norm": 3.3543488809062207, "kl": 0.0125579833984375, "learning_rate": 9.452187898044421e-07, "loss": 0.0066, "num_tokens": 52779901.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0402662754058838, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0902334592268817, "rewards/wordcountpos_reward/raw_geo/std": 0.06315647861149118, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 856.25, "completions/mean_terminated_length": 856.25, "completions/min_length": 510.0, "completions/min_terminated_length": 510.0, "epoch": 0.24324864972994598, "frac_reward_zero_std": 0.0, "grad_norm": 3.8965223276453784, "kl": 0.0151824951171875, "learning_rate": 9.450684366175703e-07, "loss": 0.0307, "num_tokens": 52811137.0, "reward": 0.0, "reward_std": 1.0612512826919556, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26006408819767146, "rewards/wordcountpos_reward/raw_geo/std": 0.2016617337884896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1164.375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.2434486897379476, "frac_reward_zero_std": 0.0, "grad_norm": 3.0730703365214422, "kl": 0.0086212158203125, "learning_rate": 9.449178907935349e-07, "loss": 0.0052, "num_tokens": 52853999.0, "reward": 0.0, "reward_std": 0.770872175693512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05712805244243435, "rewards/wordcountpos_reward/raw_geo/std": 0.0971223517825316, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 947.625, "completions/mean_terminated_length": 947.625, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.2436487297459492, "frac_reward_zero_std": 0.0, "grad_norm": 2.0875478678864225, "kl": 0.004611968994140625, "learning_rate": 9.447671524057427e-07, "loss": 0.0207, "num_tokens": 52884049.0, "reward": 0.0, "reward_std": 0.9738976955413818, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02275011329578451, "rewards/wordcountpos_reward/raw_geo/std": 0.06713969959170867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1213.125, "completions/mean_terminated_length": 1172.1429443359375, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.24384876975395078, "frac_reward_zero_std": 0.0, "grad_norm": 3.310734851548052, "kl": 0.0120086669921875, "learning_rate": 9.446162215276942e-07, "loss": -0.0046, "num_tokens": 52930739.0, "reward": 0.0, "reward_std": 1.033539056777954, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1725281437757172, "rewards/wordcountpos_reward/raw_geo/std": 0.0964625911720144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05374838498865701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1215.0625, "completions/mean_terminated_length": 1196.0667724609375, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.2440488097619524, "frac_reward_zero_std": 0.0, "grad_norm": 3.484867953204045, "kl": 0.008636474609375, "learning_rate": 9.444650982329844e-07, "loss": -0.0149, "num_tokens": 52977540.0, "reward": 0.0, "reward_std": 0.3614540696144104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03156820267612672, "rewards/wordcountpos_reward/raw_geo/std": 0.18337960772275527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1120.25, "completions/mean_terminated_length": 1094.933349609375, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.24424884976995398, "frac_reward_zero_std": 0.0, "grad_norm": 2.8357662007537283, "kl": 0.00901031494140625, "learning_rate": 9.443137825953013e-07, "loss": -0.0038, "num_tokens": 53016744.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6886307001113892, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08992545380416017, "rewards/wordcountpos_reward/raw_geo/std": 0.07444250252634478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1211.125, "completions/mean_terminated_length": 1169.857177734375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.2444488897779556, "frac_reward_zero_std": 0.0, "grad_norm": 3.5657132310649136, "kl": 0.012237548828125, "learning_rate": 9.441622746884275e-07, "loss": 0.002, "num_tokens": 53063770.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0602158308029175, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009409237166674606, "rewards/wordcountpos_reward/raw_geo/std": 0.05274953097403563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1338.5625, "completions/mean_terminated_length": 1131.0, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.2446489297859572, "frac_reward_zero_std": 0.0, "grad_norm": 2.7074057172479327, "kl": 0.00815582275390625, "learning_rate": 9.440105745862385e-07, "loss": -0.0038, "num_tokens": 53113579.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7996894121170044, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07257869077918999, "rewards/wordcountpos_reward/raw_geo/std": 0.06466109994840634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1226.0, "completions/mean_terminated_length": 1226.0, "completions/min_length": 1092.0, "completions/min_terminated_length": 1092.0, "epoch": 0.2448489697939588, "frac_reward_zero_std": 0.0, "grad_norm": 3.004768808105074, "kl": 0.01123046875, "learning_rate": 9.438586823627042e-07, "loss": -0.0177, "num_tokens": 53156339.0, "reward": -7.450580596923828e-09, "reward_std": 0.9870691299438477, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07950870546022343, "rewards/wordcountpos_reward/raw_geo/std": 0.0747406899854144, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1177.4375, "completions/mean_terminated_length": 1131.357177734375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.2450490098019604, "frac_reward_zero_std": 0.0, "grad_norm": 3.1225918469890233, "kl": 0.0121307373046875, "learning_rate": 9.43706598091888e-07, "loss": -0.0594, "num_tokens": 53209250.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8248011469841003, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06540694784462482, "rewards/wordcountpos_reward/raw_geo/std": 0.19245578448302406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1180.933349609375, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.24524904980996198, "frac_reward_zero_std": 0.0, "grad_norm": 3.3884814515719093, "kl": 0.01165771484375, "learning_rate": 9.435543218479467e-07, "loss": 0.033, "num_tokens": 53257056.0, "reward": 0.0, "reward_std": 0.8607234954833984, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22442087104005376, "rewards/wordcountpos_reward/raw_geo/std": 0.13746074123743787, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1548595540529595, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1104.625, "completions/mean_terminated_length": 1104.625, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.2454490898179636, "frac_reward_zero_std": 0.0, "grad_norm": 3.0764641261500683, "kl": 0.0107421875, "learning_rate": 9.43401853705131e-07, "loss": -0.0119, "num_tokens": 53309194.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9115550518035889, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10273402680658004, "rewards/wordcountpos_reward/raw_geo/std": 0.061602003513957164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 810.0625, "completions/mean_terminated_length": 810.0625, "completions/min_length": 618.0, "completions/min_terminated_length": 618.0, "epoch": 0.2456491298259652, "frac_reward_zero_std": 0.0, "grad_norm": 4.082409206758194, "kl": 0.0122222900390625, "learning_rate": 9.432491937377851e-07, "loss": -0.0203, "num_tokens": 53346859.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0469852685928345, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03824039220995795, "rewards/wordcountpos_reward/raw_geo/std": 0.09267537234211883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1248.875, "completions/mean_terminated_length": 1232.1334228515625, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.2458491698339668, "frac_reward_zero_std": 0.0, "grad_norm": 3.1504969618980527, "kl": 0.0100860595703125, "learning_rate": 9.430963420203465e-07, "loss": 0.0065, "num_tokens": 53395777.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0379801988601685, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1040317113317454, "rewards/wordcountpos_reward/raw_geo/std": 0.047444949142271924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1131.0, "completions/mean_terminated_length": 1045.84619140625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.2460492098419684, "frac_reward_zero_std": 0.0, "grad_norm": 3.105931406873175, "kl": 0.01165771484375, "learning_rate": 9.429432986273465e-07, "loss": 0.026, "num_tokens": 53438393.0, "reward": 0.0, "reward_std": 0.6744869947433472, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05555753008665379, "rewards/wordcountpos_reward/raw_geo/std": 0.09988069993565164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1205.0, "completions/mean_terminated_length": 1205.0, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.24624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 2.7768386273106844, "kl": 0.0096893310546875, "learning_rate": 9.427900636334098e-07, "loss": -0.0085, "num_tokens": 53482937.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0528334379196167, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19000993407122252, "rewards/wordcountpos_reward/raw_geo/std": 0.07636180337848633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 921.375, "completions/mean_terminated_length": 921.375, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.2464492898579716, "frac_reward_zero_std": 0.0, "grad_norm": 3.955997351042825, "kl": 0.0114593505859375, "learning_rate": 9.426366371132546e-07, "loss": -0.0156, "num_tokens": 53512519.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9775012731552124, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043462585574213515, "rewards/wordcountpos_reward/raw_geo/std": 0.05402398330396254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1189.5, "completions/mean_terminated_length": 1189.5, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.24664932986597318, "frac_reward_zero_std": 0.0, "grad_norm": 2.6943663326354215, "kl": 0.0107574462890625, "learning_rate": 9.42483019141692e-07, "loss": -0.0201, "num_tokens": 53555799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.673210859298706, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02774593795186238, "rewards/wordcountpos_reward/raw_geo/std": 0.04204195327686426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1096.5, "completions/mean_terminated_length": 1096.5, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2468493698739748, "frac_reward_zero_std": 0.0, "grad_norm": 3.634829535697307, "kl": 0.013519287109375, "learning_rate": 9.423292097936272e-07, "loss": 0.027, "num_tokens": 53598943.0, "reward": 0.0, "reward_std": 1.0042479038238525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07921791030444014, "rewards/wordcountpos_reward/raw_geo/std": 0.13866086258010044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1048.875, "completions/mean_terminated_length": 1048.875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.2470494098819764, "frac_reward_zero_std": 0.0, "grad_norm": 3.490694987190087, "kl": 0.0132904052734375, "learning_rate": 9.421752091440581e-07, "loss": -0.0143, "num_tokens": 53635789.0, "reward": 0.0, "reward_std": 0.673473596572876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2077029319305134, "rewards/wordcountpos_reward/raw_geo/std": 0.09350673255412578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1267.25, "completions/mean_terminated_length": 1234.0, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.247249449889978, "frac_reward_zero_std": 0.0, "grad_norm": 2.815308130377926, "kl": 0.01129150390625, "learning_rate": 9.420210172680762e-07, "loss": -0.0279, "num_tokens": 53687441.0, "reward": 4.470348358154297e-08, "reward_std": 0.9420638084411621, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1221136587499842, "rewards/wordcountpos_reward/raw_geo/std": 0.2998261790636444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1282.375, "completions/mean_terminated_length": 1282.375, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.2474494898979796, "frac_reward_zero_std": 0.0, "grad_norm": 3.0969937632608437, "kl": 0.0116119384765625, "learning_rate": 9.418666342408662e-07, "loss": -0.0095, "num_tokens": 53730239.0, "reward": 0.0, "reward_std": 0.6483302116394043, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07912510687849839, "rewards/wordcountpos_reward/raw_geo/std": 0.08930312343390825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1170.375, "completions/mean_terminated_length": 1170.375, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.24764952990598119, "frac_reward_zero_std": 0.0, "grad_norm": 3.2990938862200676, "kl": 0.0114593505859375, "learning_rate": 9.41712060137706e-07, "loss": 0.031, "num_tokens": 53775149.0, "reward": 0.0, "reward_std": 0.9025614261627197, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14336716527511156, "rewards/wordcountpos_reward/raw_geo/std": 0.06975431090624587, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1264.125, "completions/mean_terminated_length": 1264.125, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.2478495699139828, "frac_reward_zero_std": 0.0, "grad_norm": 3.0643814018326276, "kl": 0.012725830078125, "learning_rate": 9.415572950339664e-07, "loss": -0.0269, "num_tokens": 53813991.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8022619485855103, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08465348038005661, "rewards/wordcountpos_reward/raw_geo/std": 0.09597399731325108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 897.8125, "completions/mean_terminated_length": 897.8125, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.2480496099219844, "frac_reward_zero_std": 0.0, "grad_norm": 3.1233814911266022, "kl": 0.00865936279296875, "learning_rate": 9.414023390051118e-07, "loss": -0.0004, "num_tokens": 53843940.0, "reward": 0.0, "reward_std": 0.819797158241272, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014740290475578583, "rewards/wordcountpos_reward/raw_geo/std": 0.0658470598782537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1274.625, "completions/mean_terminated_length": 1222.615478515625, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.248249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 3.075918954158901, "kl": 0.01251220703125, "learning_rate": 9.412471921266994e-07, "loss": -0.0582, "num_tokens": 53897470.0, "reward": 0.0, "reward_std": 0.9509785175323486, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23388790191161093, "rewards/wordcountpos_reward/raw_geo/std": 0.09003484326266731, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 897.1875, "completions/mean_terminated_length": 897.1875, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.2484496899379876, "frac_reward_zero_std": 0.0, "grad_norm": 3.680408307644297, "kl": 0.0113983154296875, "learning_rate": 9.410918544743793e-07, "loss": -0.0321, "num_tokens": 53943569.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9757611751556396, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02647252936003171, "rewards/wordcountpos_reward/raw_geo/std": 0.16936879227561216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1167.0, "completions/mean_terminated_length": 1167.0, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.2486497299459892, "frac_reward_zero_std": 0.0, "grad_norm": 3.0143625446443933, "kl": 0.00836181640625, "learning_rate": 9.409363261238952e-07, "loss": -0.0057, "num_tokens": 53985929.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5423867106437683, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2100799131987011, "rewards/wordcountpos_reward/raw_geo/std": 0.19522053766035405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1312.0625, "completions/mean_terminated_length": 1199.300048828125, "completions/min_length": 1071.0, "completions/min_terminated_length": 1071.0, "epoch": 0.2488497699539908, "frac_reward_zero_std": 0.0, "grad_norm": 2.865796921956238, "kl": 0.010650634765625, "learning_rate": 9.407806071510833e-07, "loss": 0.0036, "num_tokens": 54029746.0, "reward": 0.0, "reward_std": 0.9989716410636902, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10754467538891152, "rewards/wordcountpos_reward/raw_geo/std": 0.1220195948567986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 721.5625, "completions/mean_terminated_length": 721.5625, "completions/min_length": 507.0, "completions/min_terminated_length": 507.0, "epoch": 0.24904980996199239, "frac_reward_zero_std": 0.0, "grad_norm": 3.9716540563864653, "kl": 0.00891876220703125, "learning_rate": 9.406246976318727e-07, "loss": -0.003, "num_tokens": 54055875.0, "reward": 0.0, "reward_std": 0.7075194120407104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07191045737442983, "rewards/wordcountpos_reward/raw_geo/std": 0.0334827689059806, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.249249849969994, "frac_reward_zero_std": 0.0, "grad_norm": 3.057722477564494, "kl": 0.010772705078125, "learning_rate": 9.40468597642286e-07, "loss": -0.0164, "num_tokens": 54096051.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7778348922729492, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006909602977690901, "rewards/wordcountpos_reward/raw_geo/std": 0.09271125732127858, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1313.4375, "completions/mean_terminated_length": 1286.7857666015625, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.2494498899779956, "frac_reward_zero_std": 0.0, "grad_norm": 3.0355939075031535, "kl": 0.00888824462890625, "learning_rate": 9.403123072584378e-07, "loss": -0.0222, "num_tokens": 54141034.0, "reward": 2.9802322387695312e-08, "reward_std": 0.46149325370788574, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029357419913127118, "rewards/wordcountpos_reward/raw_geo/std": 0.19366208734464058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1152.4375, "completions/mean_terminated_length": 1152.4375, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.2496499299859972, "frac_reward_zero_std": 0.0, "grad_norm": 2.9580821520095806, "kl": 0.0133209228515625, "learning_rate": 9.401558265565363e-07, "loss": 0.0215, "num_tokens": 54190985.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6795248985290527, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05420108240828713, "rewards/wordcountpos_reward/raw_geo/std": 0.046221657466690776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1236.3125, "completions/mean_terminated_length": 1116.45458984375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.2498499699939988, "frac_reward_zero_std": 0.0, "grad_norm": 3.084881634824025, "kl": 0.01300048828125, "learning_rate": 9.399991556128821e-07, "loss": 0.0054, "num_tokens": 54234950.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0154902935028076, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09068974039494918, "rewards/wordcountpos_reward/raw_geo/std": 0.30661587622967174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1181.625, "completions/mean_terminated_length": 1160.4000244140625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.2500500100020004, "frac_reward_zero_std": 0.0, "grad_norm": 3.321451875280831, "kl": 0.01287841796875, "learning_rate": 9.398422945038687e-07, "loss": -0.0108, "num_tokens": 54274904.0, "reward": -7.450580596923828e-09, "reward_std": 0.9767227172851562, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0804224274690149, "rewards/wordcountpos_reward/raw_geo/std": 0.12264978339153693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1192.9375, "completions/mean_terminated_length": 1172.4666748046875, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.250250050010002, "frac_reward_zero_std": 0.0, "grad_norm": 3.4138594976102894, "kl": 0.01123046875, "learning_rate": 9.396852433059822e-07, "loss": -0.0358, "num_tokens": 54321735.0, "reward": 0.0, "reward_std": 0.6348475217819214, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.041239905887221616, "rewards/wordcountpos_reward/raw_geo/std": 0.12711543450847512, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1109.125, "completions/mean_terminated_length": 1109.125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.2504500900180036, "frac_reward_zero_std": 0.0, "grad_norm": 3.178998855405846, "kl": 0.010345458984375, "learning_rate": 9.395280020958017e-07, "loss": 0.0193, "num_tokens": 54362041.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6717987060546875, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0893681435224798, "rewards/wordcountpos_reward/raw_geo/std": 0.0686433669024916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1131.75, "completions/mean_terminated_length": 1131.75, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.2506501300260052, "frac_reward_zero_std": 0.0, "grad_norm": 2.5990984201054514, "kl": 0.008197784423828125, "learning_rate": 9.393705709499983e-07, "loss": -0.0162, "num_tokens": 54397573.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5909625887870789, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12603111910443537, "rewards/wordcountpos_reward/raw_geo/std": 0.2202199493009877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1216.6875, "completions/mean_terminated_length": 1197.800048828125, "completions/min_length": 723.0, "completions/min_terminated_length": 723.0, "epoch": 0.2508501700340068, "frac_reward_zero_std": 0.0, "grad_norm": 2.770105963651306, "kl": 0.009979248046875, "learning_rate": 9.392129499453365e-07, "loss": -0.0338, "num_tokens": 54442976.0, "reward": 0.0, "reward_std": 0.4439162611961365, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16088690330533229, "rewards/wordcountpos_reward/raw_geo/std": 0.2763708098292569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1077.0, "completions/max_terminated_length": 1077.0, "completions/mean_length": 901.9375, "completions/mean_terminated_length": 901.9375, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.2510502100420084, "frac_reward_zero_std": 0.0, "grad_norm": 2.8910077582467326, "kl": 0.00782012939453125, "learning_rate": 9.390551391586729e-07, "loss": -0.0004, "num_tokens": 54476887.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0163949728012085, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10093721940259831, "rewards/wordcountpos_reward/raw_geo/std": 0.13762791550120512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.25125025005001, "frac_reward_zero_std": 0.0, "grad_norm": 2.133116863804827, "kl": 0.00440216064453125, "learning_rate": 9.388971386669569e-07, "loss": 0.0268, "num_tokens": 54509892.0, "reward": 7.450580596923828e-09, "reward_std": 0.9897512793540955, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04225042507981598, "rewards/wordcountpos_reward/raw_geo/std": 0.11726899070892662, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14782371884055637, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1173.3077392578125, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2514502900580116, "frac_reward_zero_std": 0.0, "grad_norm": 3.2561124637036296, "kl": 0.014434814453125, "learning_rate": 9.387389485472301e-07, "loss": 0.0355, "num_tokens": 54548445.0, "reward": -7.450580596923828e-09, "reward_std": 1.0002635717391968, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013848969385092313, "rewards/wordcountpos_reward/raw_geo/std": 0.025816229223001773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1060.3125, "completions/mean_terminated_length": 1060.3125, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.25165033006601323, "frac_reward_zero_std": 0.0, "grad_norm": 3.0919865414413734, "kl": 0.01078033447265625, "learning_rate": 9.385805688766268e-07, "loss": 0.008, "num_tokens": 54586290.0, "reward": 2.9802322387695312e-08, "reward_std": 1.030531644821167, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11895778131688654, "rewards/wordcountpos_reward/raw_geo/std": 0.04901466419178711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1199.375, "completions/mean_terminated_length": 1156.4285888671875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.2518503700740148, "frac_reward_zero_std": 0.0, "grad_norm": 3.505503211167695, "kl": 0.0120086669921875, "learning_rate": 9.384219997323734e-07, "loss": 0.0006, "num_tokens": 54631328.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9332997798919678, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3848945762046329, "rewards/wordcountpos_reward/raw_geo/std": 0.1249950853313475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.13326387079497304, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1212.625, "completions/mean_terminated_length": 1146.3077392578125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.2520504100820164, "frac_reward_zero_std": 0.0, "grad_norm": 3.1757813903450502, "kl": 0.0117034912109375, "learning_rate": 9.382632411917896e-07, "loss": 0.0202, "num_tokens": 54674058.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9522824287414551, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04022095800934172, "rewards/wordcountpos_reward/raw_geo/std": 0.0655810379231539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1148.0625, "completions/mean_terminated_length": 1124.60009765625, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.252250450090018, "frac_reward_zero_std": 0.0, "grad_norm": 3.6974507527693534, "kl": 0.0130462646484375, "learning_rate": 9.38104293332286e-07, "loss": 0.0099, "num_tokens": 54724667.0, "reward": -7.450580596923828e-09, "reward_std": 1.0409611463546753, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05184588466799917, "rewards/wordcountpos_reward/raw_geo/std": 0.09853057643512145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17250872227009062, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1130.1875, "completions/mean_terminated_length": 1130.1875, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.2524504900980196, "frac_reward_zero_std": 0.0, "grad_norm": 3.3250305011590573, "kl": 0.0126495361328125, "learning_rate": 9.379451562313665e-07, "loss": -0.0045, "num_tokens": 54752950.0, "reward": 7.450580596923828e-09, "reward_std": 1.0683720111846924, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010528151264465987, "rewards/wordcountpos_reward/raw_geo/std": 0.02327544213199588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1173.3125, "completions/mean_terminated_length": 1126.6429443359375, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.25265053010602123, "frac_reward_zero_std": 0.0, "grad_norm": 3.348001591553057, "kl": 0.0131072998046875, "learning_rate": 9.377858299666274e-07, "loss": -0.0698, "num_tokens": 54790131.0, "reward": 4.470348358154297e-08, "reward_std": 1.0145583152770996, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17056655754872932, "rewards/wordcountpos_reward/raw_geo/std": 0.1295930519159792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1189.9375, "completions/mean_terminated_length": 1118.3846435546875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.2528505701140228, "frac_reward_zero_std": 0.0, "grad_norm": 3.1579396158729773, "kl": 0.011810302734375, "learning_rate": 9.376263146157567e-07, "loss": -0.0406, "num_tokens": 54840154.0, "reward": 0.0, "reward_std": 0.7796655893325806, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01128076966471883, "rewards/wordcountpos_reward/raw_geo/std": 0.09652555944155027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1124.75, "completions/mean_terminated_length": 1099.7333984375, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.2530506101220244, "frac_reward_zero_std": 0.0, "grad_norm": 2.8229567483755087, "kl": 0.00846099853515625, "learning_rate": 9.374666102565349e-07, "loss": 0.0113, "num_tokens": 54885150.0, "reward": -7.450580596923828e-09, "reward_std": 1.0440900325775146, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16922787602805842, "rewards/wordcountpos_reward/raw_geo/std": 0.0929617691840423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1119.125, "completions/mean_terminated_length": 1119.125, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.253250650130026, "frac_reward_zero_std": 0.0, "grad_norm": 2.6367575137259913, "kl": 0.00952911376953125, "learning_rate": 9.373067169668342e-07, "loss": -0.0402, "num_tokens": 54939768.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7578144073486328, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06613304395781526, "rewards/wordcountpos_reward/raw_geo/std": 0.09076340207322646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1215.625, "completions/mean_terminated_length": 1196.666748046875, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.2534506901380276, "frac_reward_zero_std": 0.0, "grad_norm": 3.2372695477011266, "kl": 0.0131683349609375, "learning_rate": 9.3714663482462e-07, "loss": -0.0329, "num_tokens": 54985642.0, "reward": 0.0, "reward_std": 0.7184152603149414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025812510818699015, "rewards/wordcountpos_reward/raw_geo/std": 0.10152114274486329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1129.0625, "completions/mean_terminated_length": 1129.0625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.2536507301460292, "frac_reward_zero_std": 0.0, "grad_norm": 3.443997557440038, "kl": 0.0140838623046875, "learning_rate": 9.369863639079483e-07, "loss": -0.0305, "num_tokens": 55035747.0, "reward": 0.0, "reward_std": 0.6500293016433716, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053422663878111516, "rewards/wordcountpos_reward/raw_geo/std": 0.04111691762533374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1300.625, "completions/mean_terminated_length": 1234.166748046875, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.2538507701540308, "frac_reward_zero_std": 0.0, "grad_norm": 2.6639089399628704, "kl": 0.00926971435546875, "learning_rate": 9.368259042949684e-07, "loss": 0.0396, "num_tokens": 55091045.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0621726512908936, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16927031563030392, "rewards/wordcountpos_reward/raw_geo/std": 0.1656906032063202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1003.21435546875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.2540508101620324, "frac_reward_zero_std": 0.0, "grad_norm": 3.446181509655221, "kl": 0.0171356201171875, "learning_rate": 9.366652560639213e-07, "loss": 0.0588, "num_tokens": 55135194.0, "reward": 0.0, "reward_std": 1.0683015584945679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006379943924650185, "rewards/wordcountpos_reward/raw_geo/std": 0.05344627696656618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 984.6875, "completions/mean_terminated_length": 984.6875, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.254250850170034, "frac_reward_zero_std": 0.0, "grad_norm": 3.1148038314636026, "kl": 0.0119781494140625, "learning_rate": 9.36504419293139e-07, "loss": 0.0002, "num_tokens": 55177877.0, "reward": 0.0, "reward_std": 0.3591625690460205, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13650479135143023, "rewards/wordcountpos_reward/raw_geo/std": 0.1886810896480471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 973.6875, "completions/mean_terminated_length": 973.6875, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.2544508901780356, "frac_reward_zero_std": 0.0, "grad_norm": 1.8649460848496238, "kl": 0.004665374755859375, "learning_rate": 9.363433940610473e-07, "loss": -0.0021, "num_tokens": 55212016.0, "reward": 0.0, "reward_std": 0.6665674448013306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10728997578930068, "rewards/wordcountpos_reward/raw_geo/std": 0.06827657733252843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534189, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 997.9375, "completions/mean_terminated_length": 964.4667358398438, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.2546509301860372, "frac_reward_zero_std": 0.0, "grad_norm": 3.9371494230639654, "kl": 0.0152130126953125, "learning_rate": 9.36182180446162e-07, "loss": 0.0211, "num_tokens": 55256783.0, "reward": -3.725290298461914e-09, "reward_std": 0.9784665107727051, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16698309507261566, "rewards/wordcountpos_reward/raw_geo/std": 0.3973044352034272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414602, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1315.625, "completions/mean_terminated_length": 1273.0770263671875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.2548509701940388, "frac_reward_zero_std": 0.0, "grad_norm": 2.907954872908463, "kl": 0.00988006591796875, "learning_rate": 9.360207785270919e-07, "loss": 0.0119, "num_tokens": 55301665.0, "reward": -2.2351741790771484e-08, "reward_std": 0.947008490562439, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03704052276339866, "rewards/wordcountpos_reward/raw_geo/std": 0.0511904361538753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1221.875, "completions/mean_terminated_length": 1203.3333740234375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.2550510102020404, "frac_reward_zero_std": 0.0, "grad_norm": 3.262873969831839, "kl": 0.0124969482421875, "learning_rate": 9.358591883825374e-07, "loss": 0.0128, "num_tokens": 55353775.0, "reward": 0.0, "reward_std": 0.48852062225341797, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02199998374618549, "rewards/wordcountpos_reward/raw_geo/std": 0.10642002252772947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1032.0, "completions/mean_terminated_length": 1032.0, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.255251050210042, "frac_reward_zero_std": 0.0, "grad_norm": 2.5441785081223056, "kl": 0.0072021484375, "learning_rate": 9.356974100912905e-07, "loss": -0.0012, "num_tokens": 55386567.0, "reward": 0.0, "reward_std": 0.9048218727111816, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18271576355998648, "rewards/wordcountpos_reward/raw_geo/std": 0.07811095232491601, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1170.0, "completions/max_terminated_length": 1170.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.25545109021804363, "frac_reward_zero_std": 0.0, "grad_norm": 2.892584458556108, "kl": 0.010223388671875, "learning_rate": 9.355354437322349e-07, "loss": -0.0218, "num_tokens": 55427677.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8625585436820984, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028637234879569543, "rewards/wordcountpos_reward/raw_geo/std": 0.06760539234396637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 963.6875, "completions/mean_terminated_length": 927.9334106445312, "completions/min_length": 468.0, "completions/min_terminated_length": 468.0, "epoch": 0.2556511302260452, "frac_reward_zero_std": 0.0, "grad_norm": 3.7800643707523567, "kl": 0.00937652587890625, "learning_rate": 9.353732893843463e-07, "loss": -0.0182, "num_tokens": 55454688.0, "reward": 1.4901161193847656e-08, "reward_std": 0.903618335723877, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028224982358281237, "rewards/wordcountpos_reward/raw_geo/std": 0.05138158383457142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1049.2857666015625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.2558511702340468, "frac_reward_zero_std": 0.0, "grad_norm": 3.0079094875909136, "kl": 0.01226806640625, "learning_rate": 9.352109471266921e-07, "loss": -0.0367, "num_tokens": 55499418.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0564343929290771, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08008234643155482, "rewards/wordcountpos_reward/raw_geo/std": 0.06916243548698373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1287.9375, "completions/mean_terminated_length": 1239.0, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.2560512102420484, "frac_reward_zero_std": 0.0, "grad_norm": 2.6124486637998205, "kl": 0.00928497314453125, "learning_rate": 9.350484170384305e-07, "loss": -0.0216, "num_tokens": 55552825.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9763938784599304, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06970593028905295, "rewards/wordcountpos_reward/raw_geo/std": 0.27792124404991814, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1264.75, "completions/mean_terminated_length": 1210.4615478515625, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.25625125025005, "frac_reward_zero_std": 0.0, "grad_norm": 2.9215080353326526, "kl": 0.012176513671875, "learning_rate": 9.348856991988124e-07, "loss": -0.0126, "num_tokens": 55607445.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8163424134254456, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06512166428289373, "rewards/wordcountpos_reward/raw_geo/std": 0.03965902068909061, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1189.9375, "completions/mean_terminated_length": 1086.5833740234375, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.25645129025805163, "frac_reward_zero_std": 0.0, "grad_norm": 2.738373363053566, "kl": 0.007617950439453125, "learning_rate": 9.347227936871798e-07, "loss": 0.0458, "num_tokens": 55649940.0, "reward": 0.0, "reward_std": 1.0240424871444702, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07930292713786982, "rewards/wordcountpos_reward/raw_geo/std": 0.08234009464922543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1209.5625, "completions/mean_terminated_length": 1190.2000732421875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.2566513302660532, "frac_reward_zero_std": 0.0, "grad_norm": 3.156896390176148, "kl": 0.0163421630859375, "learning_rate": 9.345597005829659e-07, "loss": -0.0253, "num_tokens": 55689333.0, "reward": 0.0, "reward_std": 0.5715999603271484, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02092505244427041, "rewards/wordcountpos_reward/raw_geo/std": 0.09103752137188358, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1074.9375, "completions/mean_terminated_length": 1074.9375, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.2568513702740548, "frac_reward_zero_std": 0.0, "grad_norm": 3.4638608716053567, "kl": 0.0129852294921875, "learning_rate": 9.343964199656958e-07, "loss": -0.0228, "num_tokens": 55725700.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0137016773223877, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.29295312213273805, "rewards/wordcountpos_reward/raw_geo/std": 0.17071984708252969, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1211.5625, "completions/mean_terminated_length": 1170.357177734375, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.2570514102820564, "frac_reward_zero_std": 0.0, "grad_norm": 3.4762024093779567, "kl": 0.0132598876953125, "learning_rate": 9.342329519149857e-07, "loss": -0.0003, "num_tokens": 55768733.0, "reward": 0.0, "reward_std": 0.9034953117370605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0421967356798582, "rewards/wordcountpos_reward/raw_geo/std": 0.0849121131335797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 965.0625, "completions/mean_terminated_length": 965.0625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.257251450290058, "frac_reward_zero_std": 0.0, "grad_norm": 4.1527745530547, "kl": 0.014801025390625, "learning_rate": 9.340692965105436e-07, "loss": -0.0444, "num_tokens": 55817182.0, "reward": 0.0, "reward_std": 1.0609381198883057, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06974323276844036, "rewards/wordcountpos_reward/raw_geo/std": 0.14156279044522646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1320.0, "completions/mean_terminated_length": 1278.4615478515625, "completions/min_length": 1150.0, "completions/min_terminated_length": 1150.0, "epoch": 0.25745149029805964, "frac_reward_zero_std": 0.0, "grad_norm": 2.752750218479042, "kl": 0.0117340087890625, "learning_rate": 9.339054538321684e-07, "loss": -0.0154, "num_tokens": 55867950.0, "reward": -2.9802322387695312e-08, "reward_std": 1.05967378616333, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17496370841224154, "rewards/wordcountpos_reward/raw_geo/std": 0.1790953166289047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 957.5, "completions/mean_terminated_length": 957.5, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.2576515303060612, "frac_reward_zero_std": 0.0, "grad_norm": 3.914814068971208, "kl": 0.0126953125, "learning_rate": 9.337414239597508e-07, "loss": -0.041, "num_tokens": 55903118.0, "reward": 0.0, "reward_std": 1.0278408527374268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.043575248178272186, "rewards/wordcountpos_reward/raw_geo/std": 0.04494286531979787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1079.625, "completions/mean_terminated_length": 1079.625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.2578515703140628, "frac_reward_zero_std": 0.0, "grad_norm": 3.502875907660986, "kl": 0.0133056640625, "learning_rate": 9.335772069732721e-07, "loss": 0.0121, "num_tokens": 55949144.0, "reward": 5.960464477539063e-08, "reward_std": 0.5807216167449951, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.016232348059682405, "rewards/wordcountpos_reward/raw_geo/std": 0.057718988541358776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1090.125, "completions/mean_terminated_length": 1090.125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.2580516103220644, "frac_reward_zero_std": 0.0, "grad_norm": 2.9485236288450274, "kl": 0.0113372802734375, "learning_rate": 9.334128029528056e-07, "loss": -0.0206, "num_tokens": 55991850.0, "reward": 0.0, "reward_std": 0.52561354637146, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14546362731117893, "rewards/wordcountpos_reward/raw_geo/std": 0.13004232923839992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 833.5, "completions/mean_terminated_length": 833.5, "completions/min_length": 548.0, "completions/min_terminated_length": 548.0, "epoch": 0.25825165033006603, "frac_reward_zero_std": 0.0, "grad_norm": 4.1589337518459955, "kl": 0.011627197265625, "learning_rate": 9.332482119785154e-07, "loss": -0.0537, "num_tokens": 56034122.0, "reward": 0.0, "reward_std": 1.0124348402023315, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04517783157465571, "rewards/wordcountpos_reward/raw_geo/std": 0.04922203207266278, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1100.5625, "completions/mean_terminated_length": 1100.5625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.25845169033806764, "frac_reward_zero_std": 0.0, "grad_norm": 3.7382282331356254, "kl": 0.01348876953125, "learning_rate": 9.330834341306568e-07, "loss": 0.0025, "num_tokens": 56069435.0, "reward": 2.9802322387695312e-08, "reward_std": 0.786846935749054, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08983485375460394, "rewards/wordcountpos_reward/raw_geo/std": 0.06404521590731933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1199.8125, "completions/mean_terminated_length": 1130.5384521484375, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.2586517303460692, "frac_reward_zero_std": 0.0, "grad_norm": 3.6756969750041906, "kl": 0.014495849609375, "learning_rate": 9.329184694895761e-07, "loss": -0.0248, "num_tokens": 56110576.0, "reward": 0.0, "reward_std": 0.8876994848251343, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14079246728090464, "rewards/wordcountpos_reward/raw_geo/std": 0.309775475969557, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1096.6875, "completions/mean_terminated_length": 1096.6875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.2588517703540708, "frac_reward_zero_std": 0.0, "grad_norm": 3.5586776080094893, "kl": 0.0144805908203125, "learning_rate": 9.327533181357108e-07, "loss": -0.0613, "num_tokens": 56158651.0, "reward": 0.0, "reward_std": 0.7723835706710815, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04638565570125904, "rewards/wordcountpos_reward/raw_geo/std": 0.08166517632064407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1342.125, "completions/mean_terminated_length": 1305.6923828125, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.2590518103620724, "frac_reward_zero_std": 0.0, "grad_norm": 3.314303317882215, "kl": 0.0146331787109375, "learning_rate": 9.325879801495896e-07, "loss": -0.0206, "num_tokens": 56206045.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0447652339935303, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.163557598438544, "rewards/wordcountpos_reward/raw_geo/std": 0.27087149321320614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1050.0, "completions/mean_terminated_length": 1050.0, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.25925185037007403, "frac_reward_zero_std": 0.0, "grad_norm": 3.6004080229042112, "kl": 0.009552001953125, "learning_rate": 9.32422455611832e-07, "loss": 0.0002, "num_tokens": 56240501.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9028578996658325, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0861874301459659, "rewards/wordcountpos_reward/raw_geo/std": 0.17727228389438807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1217.625, "completions/mean_terminated_length": 1217.625, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.2594518903780756, "frac_reward_zero_std": 0.0, "grad_norm": 3.1142774520483307, "kl": 0.0130462646484375, "learning_rate": 9.322567446031485e-07, "loss": 0.0169, "num_tokens": 56293007.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0351372957229614, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00587760601972199, "rewards/wordcountpos_reward/raw_geo/std": 0.13707811725817387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1151.0, "completions/max_terminated_length": 1151.0, "completions/mean_length": 959.0625, "completions/mean_terminated_length": 959.0625, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.2596519303860772, "frac_reward_zero_std": 0.0, "grad_norm": 2.8855079747390597, "kl": 0.00557708740234375, "learning_rate": 9.320908472043405e-07, "loss": -0.0256, "num_tokens": 56332312.0, "reward": 0.0, "reward_std": 0.9650878310203552, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2374036854434155, "rewards/wordcountpos_reward/raw_geo/std": 0.3076723839392811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1460593486680443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1247.75, "completions/mean_terminated_length": 1096.4000244140625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.2598519703940788, "frac_reward_zero_std": 0.0, "grad_norm": 3.5448430410040217, "kl": 0.01348876953125, "learning_rate": 9.319247634963005e-07, "loss": 0.0239, "num_tokens": 56377452.0, "reward": 0.0, "reward_std": 0.9623478055000305, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008811470563813087, "rewards/wordcountpos_reward/raw_geo/std": 0.07854741734794526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1367.125, "completions/mean_terminated_length": 1287.4000244140625, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.2600520104020804, "frac_reward_zero_std": 0.0, "grad_norm": 2.6420638267204843, "kl": 0.00991058349609375, "learning_rate": 9.317584935600112e-07, "loss": 0.0115, "num_tokens": 56431318.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5112171173095703, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05187853016644232, "rewards/wordcountpos_reward/raw_geo/std": 0.12294637238428911, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1033.0, "completions/mean_terminated_length": 1033.0, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.26025205041008204, "frac_reward_zero_std": 0.0, "grad_norm": 3.5760471199024932, "kl": 0.0125579833984375, "learning_rate": 9.315920374765473e-07, "loss": -0.0289, "num_tokens": 56473102.0, "reward": -2.2351741790771484e-08, "reward_std": 1.068831205368042, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.41433268403627094, "rewards/wordcountpos_reward/raw_geo/std": 0.12221847942477106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1162.4375, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.2604520904180836, "frac_reward_zero_std": 0.0, "grad_norm": 2.8413355835479677, "kl": 0.0210113525390625, "learning_rate": 9.314253953270729e-07, "loss": 0.0161, "num_tokens": 56515709.0, "reward": 0.0, "reward_std": 1.0106377601623535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007029772900578802, "rewards/wordcountpos_reward/raw_geo/std": 0.054686941639807043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1194.6875, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.2606521304260852, "frac_reward_zero_std": 0.0, "grad_norm": 2.6049537937413323, "kl": 0.00963592529296875, "learning_rate": 9.312585671928438e-07, "loss": 0.001, "num_tokens": 56560456.0, "reward": -3.725290298461914e-09, "reward_std": 1.0658988952636719, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09387440084174416, "rewards/wordcountpos_reward/raw_geo/std": 0.1573949780043434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1051.1875, "completions/mean_terminated_length": 1021.2667236328125, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.2608521704340868, "frac_reward_zero_std": 0.0, "grad_norm": 3.2230227145957655, "kl": 0.0103607177734375, "learning_rate": 9.31091553155206e-07, "loss": -0.0021, "num_tokens": 56603891.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7782348990440369, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07878562149932157, "rewards/wordcountpos_reward/raw_geo/std": 0.08156958135051963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 1500.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 1500.0, "completions/min_terminated_length": 0.0, "epoch": 0.26105221044208843, "frac_reward_zero_std": 0.0, "grad_norm": 2.1813394648575954, "kl": 0.0089111328125, "learning_rate": 9.309243532955965e-07, "loss": 0.0004, "num_tokens": 56662019.0, "reward": 0.0, "reward_std": 1.019121527671814, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0003862166239334292, "rewards/wordcountpos_reward/raw_geo/std": 0.0381657552702585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1116.375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.26125225045009004, "frac_reward_zero_std": 0.0, "grad_norm": 3.0748479598448295, "kl": 0.009796142578125, "learning_rate": 9.307569676955427e-07, "loss": -0.0145, "num_tokens": 56701257.0, "reward": 0.0, "reward_std": 0.4968336820602417, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08799337952196712, "rewards/wordcountpos_reward/raw_geo/std": 0.20776554098218755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1327.5, "completions/mean_terminated_length": 1302.857177734375, "completions/min_length": 1148.0, "completions/min_terminated_length": 1148.0, "epoch": 0.2614522904580916, "frac_reward_zero_std": 0.0, "grad_norm": 2.721536714300266, "kl": 0.0088348388671875, "learning_rate": 9.305893964366622e-07, "loss": -0.0256, "num_tokens": 56758745.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9516972899436951, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18678117192364793, "rewards/wordcountpos_reward/raw_geo/std": 0.22755365163896482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1373.6875, "completions/mean_terminated_length": 1331.5833740234375, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.2616523304660932, "frac_reward_zero_std": 0.0, "grad_norm": 3.335858940834732, "kl": 0.0152587890625, "learning_rate": 9.30421639600664e-07, "loss": 0.003, "num_tokens": 56809836.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0039998292922974, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10080624442977545, "rewards/wordcountpos_reward/raw_geo/std": 0.10859932385832648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1124.3125, "completions/mean_terminated_length": 1124.3125, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.2618523704740948, "frac_reward_zero_std": 0.0, "grad_norm": 2.9559535032270343, "kl": 0.0161285400390625, "learning_rate": 9.302536972693468e-07, "loss": 0.0395, "num_tokens": 56844529.0, "reward": 0.0, "reward_std": 0.5205225944519043, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04030075384568577, "rewards/wordcountpos_reward/raw_geo/std": 0.05920299075581017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1119.3125, "completions/mean_terminated_length": 1119.3125, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.26205241048209643, "frac_reward_zero_std": 0.0, "grad_norm": 2.241231224971887, "kl": 0.0090789794921875, "learning_rate": 9.300855695246001e-07, "loss": 0.0157, "num_tokens": 56884694.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6421548128128052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11339379914287477, "rewards/wordcountpos_reward/raw_geo/std": 0.10363888855277821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1182.5, "completions/mean_terminated_length": 1182.5, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.26225245049009804, "frac_reward_zero_std": 0.0, "grad_norm": 2.926127619303371, "kl": 0.0115509033203125, "learning_rate": 9.299172564484037e-07, "loss": -0.0109, "num_tokens": 56929126.0, "reward": -7.450580596923828e-09, "reward_std": 1.065841794013977, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.026402997660014693, "rewards/wordcountpos_reward/raw_geo/std": 0.0982617540146608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1044.75, "completions/mean_terminated_length": 1014.4000244140625, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.2624524904980996, "frac_reward_zero_std": 0.0, "grad_norm": 3.665182666637484, "kl": 0.0137481689453125, "learning_rate": 9.297487581228278e-07, "loss": -0.0256, "num_tokens": 56961930.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6444878578186035, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0035201484503752907, "rewards/wordcountpos_reward/raw_geo/std": 0.03998971541012088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1003.5625, "completions/mean_terminated_length": 1003.5625, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.2626525305061012, "frac_reward_zero_std": 0.0, "grad_norm": 3.117479913061063, "kl": 0.00811767578125, "learning_rate": 9.295800746300333e-07, "loss": -0.0002, "num_tokens": 57008827.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0623592138290405, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21138889000868058, "rewards/wordcountpos_reward/raw_geo/std": 0.06605080761790647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1018.4375, "completions/mean_terminated_length": 1018.4375, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.2628525705141028, "frac_reward_zero_std": 0.0, "grad_norm": 3.111308755693173, "kl": 0.014068603515625, "learning_rate": 9.294112060522707e-07, "loss": -0.0057, "num_tokens": 57058722.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8369705677032471, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07324355186536505, "rewards/wordcountpos_reward/raw_geo/std": 0.1534260980394482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1500617156989701, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1118.6875, "completions/mean_terminated_length": 1118.6875, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.26305261052210444, "frac_reward_zero_std": 0.0, "grad_norm": 3.165806866067674, "kl": 0.0119171142578125, "learning_rate": 9.29242152471881e-07, "loss": 0.0238, "num_tokens": 57110381.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7537950277328491, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12126831470287965, "rewards/wordcountpos_reward/raw_geo/std": 0.06961113409480561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.15491933384829668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1119.625, "completions/mean_terminated_length": 1119.625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.26325265053010605, "frac_reward_zero_std": 0.0, "grad_norm": 3.3213003949512054, "kl": 0.01226806640625, "learning_rate": 9.290729139712959e-07, "loss": -0.0109, "num_tokens": 57152983.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9175257682800293, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06356923035458562, "rewards/wordcountpos_reward/raw_geo/std": 0.11717775277599078, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1083.6875, "completions/mean_terminated_length": 1083.6875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.2634526905381076, "frac_reward_zero_std": 0.0, "grad_norm": 3.493175334128261, "kl": 0.012451171875, "learning_rate": 9.289034906330364e-07, "loss": -0.0132, "num_tokens": 57202906.0, "reward": 0.0, "reward_std": 0.8033919334411621, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19616846080657924, "rewards/wordcountpos_reward/raw_geo/std": 0.12880272523051836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1143.9375, "completions/mean_terminated_length": 1120.2000732421875, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.2636527305461092, "frac_reward_zero_std": 0.0, "grad_norm": 3.368674241011562, "kl": 0.0130615234375, "learning_rate": 9.287338825397144e-07, "loss": -0.025, "num_tokens": 57246649.0, "reward": 0.0, "reward_std": 0.8065879940986633, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0912165502212347, "rewards/wordcountpos_reward/raw_geo/std": 0.13101515209777695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1110.3125, "completions/mean_terminated_length": 1020.3846435546875, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.2638527705541108, "frac_reward_zero_std": 0.0, "grad_norm": 2.8502154225176315, "kl": 0.0113372802734375, "learning_rate": 9.285640897740315e-07, "loss": -0.0408, "num_tokens": 57294470.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8818243741989136, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07301610557198836, "rewards/wordcountpos_reward/raw_geo/std": 0.08821878668366548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1185.6875, "completions/mean_terminated_length": 1185.6875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.26405281056211244, "frac_reward_zero_std": 0.0, "grad_norm": 2.830628450412457, "kl": 0.00850677490234375, "learning_rate": 9.283941124187794e-07, "loss": 0.0051, "num_tokens": 57332625.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0440890789031982, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03537423104814936, "rewards/wordcountpos_reward/raw_geo/std": 0.07573538213556574, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1121.875, "completions/mean_terminated_length": 1121.875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.264252850570114, "frac_reward_zero_std": 0.0, "grad_norm": 3.1565792310277754, "kl": 0.01190185546875, "learning_rate": 9.282239505568398e-07, "loss": -0.0457, "num_tokens": 57377071.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9014105796813965, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015475371323433004, "rewards/wordcountpos_reward/raw_geo/std": 0.10963715732439772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1059.875, "completions/mean_terminated_length": 1059.875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.2644528905781156, "frac_reward_zero_std": 0.0, "grad_norm": 3.4623503045748625, "kl": 0.012725830078125, "learning_rate": 9.280536042711843e-07, "loss": 0.0019, "num_tokens": 57426893.0, "reward": 0.0, "reward_std": 0.7773397564888, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06660652874005955, "rewards/wordcountpos_reward/raw_geo/std": 0.06535892696742167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1308.0625, "completions/mean_terminated_length": 1280.6429443359375, "completions/min_length": 1074.0, "completions/min_terminated_length": 1074.0, "epoch": 0.2646529305861172, "frac_reward_zero_std": 0.0, "grad_norm": 2.5581026411988477, "kl": 0.01007080078125, "learning_rate": 9.278830736448749e-07, "loss": 0.003, "num_tokens": 57465710.0, "reward": 0.0, "reward_std": 1.0626261234283447, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13668668904954506, "rewards/wordcountpos_reward/raw_geo/std": 0.10590962318200114, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1010.125, "completions/mean_terminated_length": 1010.125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.26485297059411883, "frac_reward_zero_std": 0.0, "grad_norm": 3.178599080251012, "kl": 0.0114288330078125, "learning_rate": 9.277123587610627e-07, "loss": -0.017, "num_tokens": 57504896.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0451385974884033, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01110605229219865, "rewards/wordcountpos_reward/raw_geo/std": 0.16628909135656478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 986.9375, "completions/mean_terminated_length": 986.9375, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.26505301060212044, "frac_reward_zero_std": 0.0, "grad_norm": 3.2899546761259764, "kl": 0.0110931396484375, "learning_rate": 9.275414597029892e-07, "loss": -0.0421, "num_tokens": 57543143.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0194789171218872, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08199274817558691, "rewards/wordcountpos_reward/raw_geo/std": 0.11202208502788084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1230.3125, "completions/mean_terminated_length": 1212.3333740234375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.265253050610122, "frac_reward_zero_std": 0.0, "grad_norm": 3.1999373137212315, "kl": 0.013214111328125, "learning_rate": 9.273703765539856e-07, "loss": 0.008, "num_tokens": 57598172.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0607068538665771, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06759752511962717, "rewards/wordcountpos_reward/raw_geo/std": 0.32600691585950425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1335.875, "completions/mean_terminated_length": 1298.0, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.2654530906181236, "frac_reward_zero_std": 0.0, "grad_norm": 2.7561038392801196, "kl": 0.010406494140625, "learning_rate": 9.271991093974729e-07, "loss": 0.0455, "num_tokens": 57643634.0, "reward": 0.0, "reward_std": 0.9067040085792542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011244473880272929, "rewards/wordcountpos_reward/raw_geo/std": 0.14834116845580742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1231.3125, "completions/mean_terminated_length": 1192.9285888671875, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.2656531306261252, "frac_reward_zero_std": 0.0, "grad_norm": 3.2127250472615114, "kl": 0.0118560791015625, "learning_rate": 9.270276583169615e-07, "loss": -0.0101, "num_tokens": 57679415.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7477010488510132, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02571372064069735, "rewards/wordcountpos_reward/raw_geo/std": 0.06179244424519644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1275.625, "completions/mean_terminated_length": 1260.666748046875, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.26585317063412683, "frac_reward_zero_std": 0.0, "grad_norm": 2.9060665760288633, "kl": 0.0151519775390625, "learning_rate": 9.26856023396052e-07, "loss": 0.0219, "num_tokens": 57724153.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7904667854309082, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006325948389958885, "rewards/wordcountpos_reward/raw_geo/std": 0.07352738937643481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 971.75, "completions/mean_terminated_length": 936.5333862304688, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.26605321064212845, "frac_reward_zero_std": 0.0, "grad_norm": 4.293784926721248, "kl": 0.0136260986328125, "learning_rate": 9.266842047184341e-07, "loss": -0.0151, "num_tokens": 57774301.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0023233890533447, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08929856891248433, "rewards/wordcountpos_reward/raw_geo/std": 0.10912798576740892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1133.1875, "completions/mean_terminated_length": 1108.7333984375, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.26625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 2.6117504817377273, "kl": 0.0096435546875, "learning_rate": 9.265122023678876e-07, "loss": -0.0095, "num_tokens": 57825328.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9647880792617798, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0431066818198256, "rewards/wordcountpos_reward/raw_geo/std": 0.038825996973331764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1076.625, "completions/mean_terminated_length": 1048.4000244140625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.2664532906581316, "frac_reward_zero_std": 0.0, "grad_norm": 3.3233035719936965, "kl": 0.0125732421875, "learning_rate": 9.263400164282813e-07, "loss": -0.0711, "num_tokens": 57866082.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9972634315490723, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10954330783243124, "rewards/wordcountpos_reward/raw_geo/std": 0.12660006848122876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1063.75, "completions/mean_terminated_length": 918.3333740234375, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.2666533306661332, "frac_reward_zero_std": 0.0, "grad_norm": 2.8658710772958367, "kl": 0.012237548828125, "learning_rate": 9.261676469835742e-07, "loss": -0.0622, "num_tokens": 57909158.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7784160375595093, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09446571757206894, "rewards/wordcountpos_reward/raw_geo/std": 0.0724714191814173, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1040.8125, "completions/mean_terminated_length": 1010.2000732421875, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.26685337067413484, "frac_reward_zero_std": 0.0, "grad_norm": 3.849908116579512, "kl": 0.01544189453125, "learning_rate": 9.259950941178143e-07, "loss": -0.0529, "num_tokens": 57944483.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8825619220733643, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.038455335685986705, "rewards/wordcountpos_reward/raw_geo/std": 0.07967617689491613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1191.75, "completions/mean_terminated_length": 1191.75, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.26705341068213645, "frac_reward_zero_std": 0.0, "grad_norm": 3.294961518687421, "kl": 0.0128021240234375, "learning_rate": 9.258223579151391e-07, "loss": -0.0127, "num_tokens": 57978503.0, "reward": 7.450580596923828e-09, "reward_std": 1.0289199352264404, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1318588002451407, "rewards/wordcountpos_reward/raw_geo/std": 0.040993331363048655, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1096.4375, "completions/mean_terminated_length": 1096.4375, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.267253450690138, "frac_reward_zero_std": 0.0, "grad_norm": 3.482373763042207, "kl": 0.011566162109375, "learning_rate": 9.256494384597757e-07, "loss": 0.0141, "num_tokens": 58018846.0, "reward": 0.0, "reward_std": 0.8990585207939148, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04837066820683459, "rewards/wordcountpos_reward/raw_geo/std": 0.16541771169728306, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1043.4375, "completions/mean_terminated_length": 1043.4375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.2674534906981396, "frac_reward_zero_std": 0.0, "grad_norm": 3.613469038751485, "kl": 0.015960693359375, "learning_rate": 9.254763358360404e-07, "loss": 0.0034, "num_tokens": 58058677.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8007277846336365, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4691028912080138, "rewards/wordcountpos_reward/raw_geo/std": 0.3709570160688002, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1013.625, "completions/mean_terminated_length": 1013.625, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.26765353070614123, "frac_reward_zero_std": 0.0, "grad_norm": 3.2969465098105486, "kl": 0.011199951171875, "learning_rate": 9.253030501283385e-07, "loss": -0.0032, "num_tokens": 58109831.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8624023199081421, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21244774534812277, "rewards/wordcountpos_reward/raw_geo/std": 0.0825516805567163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1343709624716425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1257.25, "completions/mean_terminated_length": 1111.5999755859375, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.26785357071414284, "frac_reward_zero_std": 0.0, "grad_norm": 2.140477136099414, "kl": 0.00653076171875, "learning_rate": 9.251295814211653e-07, "loss": 0.0038, "num_tokens": 58149803.0, "reward": 7.450580596923828e-09, "reward_std": 1.0512056350708008, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.026270034125531232, "rewards/wordcountpos_reward/raw_geo/std": 0.06230033372380366, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 990.6875, "completions/mean_terminated_length": 990.6875, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.26805361072214445, "frac_reward_zero_std": 0.0, "grad_norm": 3.852051986465934, "kl": 0.0138397216796875, "learning_rate": 9.249559297991048e-07, "loss": -0.0026, "num_tokens": 58180854.0, "reward": -1.4901161193847656e-08, "reward_std": 1.024838924407959, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013040899338463155, "rewards/wordcountpos_reward/raw_geo/std": 0.09594836052791329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1434.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1068.1875, "completions/mean_terminated_length": 1068.1875, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.268253650730146, "frac_reward_zero_std": 0.0, "grad_norm": 3.44857219439527, "kl": 0.0167694091796875, "learning_rate": 9.247820953468303e-07, "loss": -0.0598, "num_tokens": 58229393.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9985100030899048, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07919737214670426, "rewards/wordcountpos_reward/raw_geo/std": 0.06912740452471008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1349.25, "completions/mean_terminated_length": 1314.4615478515625, "completions/min_length": 1006.0, "completions/min_terminated_length": 1006.0, "epoch": 0.2684536907381476, "frac_reward_zero_std": 0.0, "grad_norm": 2.9074942292928037, "kl": 0.012451171875, "learning_rate": 9.24608078149104e-07, "loss": 0.0001, "num_tokens": 58276677.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8224983215332031, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12249196806413662, "rewards/wordcountpos_reward/raw_geo/std": 0.0753645342761518, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1074.625, "completions/mean_terminated_length": 1074.625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.26865373074614923, "frac_reward_zero_std": 0.0, "grad_norm": 3.230205102457441, "kl": 0.0110931396484375, "learning_rate": 9.244338782907779e-07, "loss": -0.0204, "num_tokens": 58314759.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8409535884857178, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010195062058330174, "rewards/wordcountpos_reward/raw_geo/std": 0.04283344997887795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.19907192074632132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1227.6875, "completions/mean_terminated_length": 1188.7857666015625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.26885377075415084, "frac_reward_zero_std": 0.0, "grad_norm": 3.164203935473939, "kl": 0.01324462890625, "learning_rate": 9.242594958567927e-07, "loss": -0.0148, "num_tokens": 58351210.0, "reward": 0.0, "reward_std": 0.9589077234268188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08234442081184556, "rewards/wordcountpos_reward/raw_geo/std": 0.0987841680522422, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1258.0, "completions/mean_terminated_length": 1241.86669921875, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.26905381076215246, "frac_reward_zero_std": 0.0, "grad_norm": 3.176546533261156, "kl": 0.01373291015625, "learning_rate": 9.240849309321775e-07, "loss": -0.0073, "num_tokens": 58398050.0, "reward": 1.862645149230957e-08, "reward_std": 1.0659713745117188, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026625377001508035, "rewards/wordcountpos_reward/raw_geo/std": 0.2688349212892977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1068.0, "completions/mean_terminated_length": 1068.0, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.269253850770154, "frac_reward_zero_std": 0.0, "grad_norm": 3.2654399088518344, "kl": 0.0128021240234375, "learning_rate": 9.239101836020514e-07, "loss": -0.0048, "num_tokens": 58447954.0, "reward": 0.0, "reward_std": 0.7926424741744995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05405716232633811, "rewards/wordcountpos_reward/raw_geo/std": 0.0538442902263567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 968.0, "completions/mean_terminated_length": 968.0, "completions/min_length": 709.0, "completions/min_terminated_length": 709.0, "epoch": 0.2694538907781556, "frac_reward_zero_std": 0.0, "grad_norm": 2.5701510035149515, "kl": 0.00904083251953125, "learning_rate": 9.237352539516218e-07, "loss": -0.003, "num_tokens": 58477826.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9795045852661133, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01667172677709021, "rewards/wordcountpos_reward/raw_geo/std": 0.03454947781911094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 877.375, "completions/mean_terminated_length": 877.375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.26965393078615724, "frac_reward_zero_std": 0.0, "grad_norm": 2.9214818861509846, "kl": 0.00888824462890625, "learning_rate": 9.235601420661854e-07, "loss": -0.0142, "num_tokens": 58521200.0, "reward": -2.9802322387695312e-08, "reward_std": 0.910345196723938, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19097399036646404, "rewards/wordcountpos_reward/raw_geo/std": 0.18371007194953604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1108.125, "completions/mean_terminated_length": 1017.6923217773438, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.26985397079415885, "frac_reward_zero_std": 0.0, "grad_norm": 3.702804609565373, "kl": 0.0123748779296875, "learning_rate": 9.233848480311276e-07, "loss": -0.0103, "num_tokens": 58564370.0, "reward": 0.0, "reward_std": 0.9629231691360474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.018004762443022855, "rewards/wordcountpos_reward/raw_geo/std": 0.04140659806051792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1251.125, "completions/mean_terminated_length": 1234.533447265625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.2700540108021604, "frac_reward_zero_std": 0.0, "grad_norm": 3.314697641112342, "kl": 0.011932373046875, "learning_rate": 9.232093719319222e-07, "loss": -0.0472, "num_tokens": 58612924.0, "reward": 2.9802322387695312e-08, "reward_std": 0.45265740156173706, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20688401678725907, "rewards/wordcountpos_reward/raw_geo/std": 0.19138629295852885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1014.1875, "completions/mean_terminated_length": 1014.1875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.270254050810162, "frac_reward_zero_std": 0.0, "grad_norm": 2.6738455445679485, "kl": 0.0095367431640625, "learning_rate": 9.230337138541324e-07, "loss": -0.018, "num_tokens": 58650943.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7811430096626282, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004392687295942687, "rewards/wordcountpos_reward/raw_geo/std": 0.056791360021955645, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408158, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1187.0, "completions/mean_terminated_length": 1166.1334228515625, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.27045409081816363, "frac_reward_zero_std": 0.0, "grad_norm": 3.1413484026259946, "kl": 0.0172119140625, "learning_rate": 9.228578738834097e-07, "loss": -0.0257, "num_tokens": 58694615.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9431371092796326, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05245220304259816, "rewards/wordcountpos_reward/raw_geo/std": 0.10026222694921318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1142.625, "completions/mean_terminated_length": 1118.800048828125, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.27065413082616524, "frac_reward_zero_std": 0.0, "grad_norm": 6.181918848272692, "kl": 0.0277862548828125, "learning_rate": 9.226818521054946e-07, "loss": 0.0056, "num_tokens": 58736977.0, "reward": 0.0, "reward_std": 0.7209534645080566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14046826286678493, "rewards/wordcountpos_reward/raw_geo/std": 0.08713244162950681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1149.0, "completions/mean_terminated_length": 1149.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.27085417083416685, "frac_reward_zero_std": 0.0, "grad_norm": 2.6225405473235086, "kl": 0.0102996826171875, "learning_rate": 9.225056486062162e-07, "loss": 0.0074, "num_tokens": 58783361.0, "reward": 0.0, "reward_std": 1.0312507152557373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11598048332012811, "rewards/wordcountpos_reward/raw_geo/std": 0.08426221901713499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1346.9375, "completions/mean_terminated_length": 1311.615478515625, "completions/min_length": 1080.0, "completions/min_terminated_length": 1080.0, "epoch": 0.2710542108421684, "frac_reward_zero_std": 0.0, "grad_norm": 3.054766502370073, "kl": 0.01318359375, "learning_rate": 9.22329263471492e-07, "loss": -0.0419, "num_tokens": 58824928.0, "reward": 1.4901161193847656e-08, "reward_std": 1.065586805343628, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1565419472804918, "rewards/wordcountpos_reward/raw_geo/std": 0.2769985640408725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1314.6875, "completions/mean_terminated_length": 1271.923095703125, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.27125425085017, "frac_reward_zero_std": 0.0, "grad_norm": 2.372160875298143, "kl": 0.00640869140625, "learning_rate": 9.221526967873282e-07, "loss": 0.0365, "num_tokens": 58874595.0, "reward": 0.0, "reward_std": 0.7954102754592896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07302610188194188, "rewards/wordcountpos_reward/raw_geo/std": 0.0504454627781234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 835.875, "completions/mean_terminated_length": 835.875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.27145429085817163, "frac_reward_zero_std": 0.0, "grad_norm": 4.535797584154669, "kl": 0.01132965087890625, "learning_rate": 9.219759486398195e-07, "loss": 0.0615, "num_tokens": 58915121.0, "reward": 2.9802322387695312e-08, "reward_std": 1.022127628326416, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03609640559805066, "rewards/wordcountpos_reward/raw_geo/std": 0.038934199834147, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1519624710005487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1125.0, "completions/max_terminated_length": 1125.0, "completions/mean_length": 839.9375, "completions/mean_terminated_length": 839.9375, "completions/min_length": 576.0, "completions/min_terminated_length": 576.0, "epoch": 0.27165433086617324, "frac_reward_zero_std": 0.0, "grad_norm": 3.494026449834135, "kl": 0.00786590576171875, "learning_rate": 9.217990191151491e-07, "loss": 0.0187, "num_tokens": 58951608.0, "reward": 1.862645149230957e-08, "reward_std": 1.065683364868164, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004431650772553532, "rewards/wordcountpos_reward/raw_geo/std": 0.08076126551883148, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1069.25, "completions/mean_terminated_length": 1040.533447265625, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.27185437087417486, "frac_reward_zero_std": 0.0, "grad_norm": 3.341375727278998, "kl": 0.012908935546875, "learning_rate": 9.216219082995888e-07, "loss": 0.0143, "num_tokens": 59001508.0, "reward": 0.0, "reward_std": 0.6653932332992554, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07971418875054004, "rewards/wordcountpos_reward/raw_geo/std": 0.13084018230139893, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1164.3125, "completions/mean_terminated_length": 1164.3125, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.2720544108821764, "frac_reward_zero_std": 0.0, "grad_norm": 3.213983634875895, "kl": 0.0158538818359375, "learning_rate": 9.214446162794985e-07, "loss": 0.0115, "num_tokens": 59052089.0, "reward": 0.0, "reward_std": 0.6901043653488159, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07215062176201831, "rewards/wordcountpos_reward/raw_geo/std": 0.1702939987173843, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1146.625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.272254450890178, "frac_reward_zero_std": 0.0, "grad_norm": 3.0531281776730466, "kl": 0.0136871337890625, "learning_rate": 9.212671431413266e-07, "loss": 0.0085, "num_tokens": 59101363.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0148411989212036, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05979066423449196, "rewards/wordcountpos_reward/raw_geo/std": 0.05588237713060525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1104.6875, "completions/mean_terminated_length": 1104.6875, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.27245449089817964, "frac_reward_zero_std": 0.0, "grad_norm": 2.7486261436638686, "kl": 0.01239013671875, "learning_rate": 9.210894889716096e-07, "loss": -0.0325, "num_tokens": 59139486.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9145042896270752, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04023661018076847, "rewards/wordcountpos_reward/raw_geo/std": 0.1367240657793835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1159.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 939.375, "completions/mean_terminated_length": 939.375, "completions/min_length": 502.0, "completions/min_terminated_length": 502.0, "epoch": 0.27265453090618125, "frac_reward_zero_std": 0.0, "grad_norm": 3.6228772180189983, "kl": 0.01214599609375, "learning_rate": 9.20911653856973e-07, "loss": -0.0875, "num_tokens": 59175116.0, "reward": 0.0, "reward_std": 0.7505817413330078, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053260712633788766, "rewards/wordcountpos_reward/raw_geo/std": 0.07830346831739698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1131.625, "completions/mean_terminated_length": 1107.0667724609375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.27285457091418286, "frac_reward_zero_std": 0.0, "grad_norm": 3.4109351390702862, "kl": 0.011871337890625, "learning_rate": 9.207336378841296e-07, "loss": 0.0134, "num_tokens": 59209534.0, "reward": 0.0, "reward_std": 0.8190287351608276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04012960712301714, "rewards/wordcountpos_reward/raw_geo/std": 0.07664959236179351, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 960.9375, "completions/mean_terminated_length": 960.9375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.2730546109221844, "frac_reward_zero_std": 0.0, "grad_norm": 3.6805429393713434, "kl": 0.016021728515625, "learning_rate": 9.205554411398809e-07, "loss": -0.0275, "num_tokens": 59245741.0, "reward": 0.0, "reward_std": 1.008124589920044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005485846110903772, "rewards/wordcountpos_reward/raw_geo/std": 0.23073269196236268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1250.1875, "completions/mean_terminated_length": 1233.533447265625, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.273254650930186, "frac_reward_zero_std": 0.0, "grad_norm": 2.924440519961206, "kl": 0.011688232421875, "learning_rate": 9.203770637111164e-07, "loss": -0.0247, "num_tokens": 59297224.0, "reward": 0.0, "reward_std": 0.7118884325027466, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0474396312917142, "rewards/wordcountpos_reward/raw_geo/std": 0.10290443680017783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1259.9375, "completions/mean_terminated_length": 1243.933349609375, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.27345469093818764, "frac_reward_zero_std": 0.0, "grad_norm": 3.0182803361247963, "kl": 0.0117950439453125, "learning_rate": 9.201985056848137e-07, "loss": 0.0079, "num_tokens": 59346439.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7288313508033752, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0688705113855613, "rewards/wordcountpos_reward/raw_geo/std": 0.14611475086579112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1176.0625, "completions/mean_terminated_length": 1028.8182373046875, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.27365473094618925, "frac_reward_zero_std": 0.0, "grad_norm": 3.778286334307396, "kl": 0.0194244384765625, "learning_rate": 9.200197671480388e-07, "loss": -0.0006, "num_tokens": 59401520.0, "reward": 7.450580596923828e-09, "reward_std": 1.0400464534759521, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.00717439011179862, "rewards/wordcountpos_reward/raw_geo/std": 0.06822649921302706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1339.4375, "completions/mean_terminated_length": 1178.875, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.27385477095419086, "frac_reward_zero_std": 0.0, "grad_norm": 2.7184337092932904, "kl": 0.00885772705078125, "learning_rate": 9.198408481879451e-07, "loss": -0.005, "num_tokens": 59447263.0, "reward": 0.0, "reward_std": 0.4408210515975952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02206530877448587, "rewards/wordcountpos_reward/raw_geo/std": 0.34701958478135586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 975.3125, "completions/mean_terminated_length": 975.3125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.2740548109621924, "frac_reward_zero_std": 0.0, "grad_norm": 3.68807508959835, "kl": 0.01873779296875, "learning_rate": 9.196617488917744e-07, "loss": -0.0839, "num_tokens": 59498204.0, "reward": 0.0, "reward_std": 0.98270583152771, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15365591842312276, "rewards/wordcountpos_reward/raw_geo/std": 0.22938719329650462, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1146.9375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.27425485097019403, "frac_reward_zero_std": 0.0, "grad_norm": 2.763464346946391, "kl": 0.00998687744140625, "learning_rate": 9.194824693468565e-07, "loss": 0.0185, "num_tokens": 59543899.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9326350688934326, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012038603836024654, "rewards/wordcountpos_reward/raw_geo/std": 0.10087102114186265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1220.0, "completions/mean_terminated_length": 1201.3333740234375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.27445489097819564, "frac_reward_zero_std": 0.0, "grad_norm": 3.5615131516961642, "kl": 0.013153076171875, "learning_rate": 9.193030096406086e-07, "loss": -0.0379, "num_tokens": 59592379.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0213422775268555, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018487710210411637, "rewards/wordcountpos_reward/raw_geo/std": 0.09214097374801154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 891.3125, "completions/mean_terminated_length": 891.3125, "completions/min_length": 602.0, "completions/min_terminated_length": 602.0, "epoch": 0.27465493098619725, "frac_reward_zero_std": 0.0, "grad_norm": 3.7929046672160664, "kl": 0.0175323486328125, "learning_rate": 9.191233698605362e-07, "loss": -0.0781, "num_tokens": 59642856.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8414506316184998, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14643251135397625, "rewards/wordcountpos_reward/raw_geo/std": 0.07938352603492103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1132.0625, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.27485497099419887, "frac_reward_zero_std": 0.0, "grad_norm": 2.8491561938692787, "kl": 0.00765228271484375, "learning_rate": 9.189435500942325e-07, "loss": 0.0062, "num_tokens": 59682665.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9678256511688232, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022124722463125385, "rewards/wordcountpos_reward/raw_geo/std": 0.22223568260669183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567834, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1411.3125, "completions/mean_terminated_length": 1342.3333740234375, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.2750550110022004, "frac_reward_zero_std": 0.0, "grad_norm": 3.1115698700810617, "kl": 0.0123748779296875, "learning_rate": 9.187635504293786e-07, "loss": -0.0057, "num_tokens": 59738502.0, "reward": 0.0, "reward_std": 0.7496911883354187, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.055865920176616526, "rewards/wordcountpos_reward/raw_geo/std": 0.059380351825215186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1044.0, "completions/mean_terminated_length": 1044.0, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.27525505101020203, "frac_reward_zero_std": 0.0, "grad_norm": 3.4574081519606032, "kl": 0.0126953125, "learning_rate": 9.185833709537428e-07, "loss": -0.0167, "num_tokens": 59773478.0, "reward": 0.0, "reward_std": 0.9807004928588867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035253871587400905, "rewards/wordcountpos_reward/raw_geo/std": 0.11689507061072131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1128.875, "completions/mean_terminated_length": 1104.1334228515625, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.27545509101820365, "frac_reward_zero_std": 0.0, "grad_norm": 3.730138739156282, "kl": 0.013519287109375, "learning_rate": 9.184030117551817e-07, "loss": -0.0004, "num_tokens": 59815428.0, "reward": 0.0, "reward_std": 0.6007387638092041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12532743233571975, "rewards/wordcountpos_reward/raw_geo/std": 0.13515303656722236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14395215254459456, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1131.9375, "completions/mean_terminated_length": 1131.9375, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.27565513102620526, "frac_reward_zero_std": 0.0, "grad_norm": 3.2719960529629306, "kl": 0.012908935546875, "learning_rate": 9.182224729216392e-07, "loss": -0.0071, "num_tokens": 59858811.0, "reward": 0.0, "reward_std": 0.7951167821884155, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2106708806558299, "rewards/wordcountpos_reward/raw_geo/std": 0.11682420488350234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1106.6875, "completions/mean_terminated_length": 1106.6875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.2758551710342068, "frac_reward_zero_std": 0.0, "grad_norm": 2.753226886309937, "kl": 0.00955963134765625, "learning_rate": 9.18041754541147e-07, "loss": -0.0088, "num_tokens": 59902230.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0246782302856445, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1506037484712467, "rewards/wordcountpos_reward/raw_geo/std": 0.10874218240308127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1143.25, "completions/mean_terminated_length": 1143.25, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.2760552110422084, "frac_reward_zero_std": 0.0, "grad_norm": 2.8745428739439634, "kl": 0.0088043212890625, "learning_rate": 9.178608567018239e-07, "loss": -0.0137, "num_tokens": 59957482.0, "reward": 0.0, "reward_std": 0.556842565536499, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06917283100637084, "rewards/wordcountpos_reward/raw_geo/std": 0.11221379997586392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1219.125, "completions/mean_terminated_length": 1200.4000244140625, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.27625525105021004, "frac_reward_zero_std": 0.0, "grad_norm": 2.2436239484436147, "kl": 0.0069580078125, "learning_rate": 9.176797794918766e-07, "loss": -0.0166, "num_tokens": 60002164.0, "reward": 0.0, "reward_std": 0.36811119318008423, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.017040913194512456, "rewards/wordcountpos_reward/raw_geo/std": 0.06916997067753554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1237.125, "completions/mean_terminated_length": 1219.60009765625, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.27645529105821165, "frac_reward_zero_std": 0.0, "grad_norm": 3.001512025782972, "kl": 0.00937652587890625, "learning_rate": 9.174985229995993e-07, "loss": 0.0295, "num_tokens": 60046182.0, "reward": 5.960464477539063e-08, "reward_std": 0.46628624200820923, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06045775510129246, "rewards/wordcountpos_reward/raw_geo/std": 0.09927655831750105, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1155.3125, "completions/mean_terminated_length": 1132.3333740234375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.27665533106621326, "frac_reward_zero_std": 0.0, "grad_norm": 3.0843710147433128, "kl": 0.011383056640625, "learning_rate": 9.173170873133733e-07, "loss": -0.0021, "num_tokens": 60077075.0, "reward": 7.450580596923828e-09, "reward_std": 1.0667418241500854, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04325215230585805, "rewards/wordcountpos_reward/raw_geo/std": 0.10393324990665573, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 982.3125, "completions/mean_terminated_length": 982.3125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.2768553710742148, "frac_reward_zero_std": 0.0, "grad_norm": 3.9166711158287266, "kl": 0.0164794921875, "learning_rate": 9.171354725216677e-07, "loss": -0.0151, "num_tokens": 60113672.0, "reward": 0.0, "reward_std": 0.8619920015335083, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09801672317854913, "rewards/wordcountpos_reward/raw_geo/std": 0.09151933487160709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1168.8125, "completions/mean_terminated_length": 1168.8125, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.27705541108221643, "frac_reward_zero_std": 0.0, "grad_norm": 2.7847432214754884, "kl": 0.01123046875, "learning_rate": 9.169536787130384e-07, "loss": -0.0279, "num_tokens": 60147797.0, "reward": -4.470348358154297e-08, "reward_std": 1.0301002264022827, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2890869895835476, "rewards/wordcountpos_reward/raw_geo/std": 0.13441483814687813, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1056.0, "completions/mean_terminated_length": 1056.0, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.27725545109021804, "frac_reward_zero_std": 0.0, "grad_norm": 3.3176775283118762, "kl": 0.016632080078125, "learning_rate": 9.167717059761291e-07, "loss": -0.0636, "num_tokens": 60189253.0, "reward": 0.0, "reward_std": 0.7266931533813477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16031608474647557, "rewards/wordcountpos_reward/raw_geo/std": 0.19554262779120318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.27745549109821965, "frac_reward_zero_std": 0.0, "grad_norm": 2.920281906862375, "kl": 0.00777435302734375, "learning_rate": 9.165895543996706e-07, "loss": 0.0105, "num_tokens": 60221416.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0156707763671875, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054389071187768555, "rewards/wordcountpos_reward/raw_geo/std": 0.11223096068870163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 981.4375, "completions/mean_terminated_length": 946.86669921875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.27765553110622126, "frac_reward_zero_std": 0.0, "grad_norm": 3.416121583200704, "kl": 0.010345458984375, "learning_rate": 9.164072240724808e-07, "loss": 0.0229, "num_tokens": 60253767.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0603771209716797, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008125927638311006, "rewards/wordcountpos_reward/raw_geo/std": 0.07373990583865948, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1139.375, "completions/mean_terminated_length": 1139.375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.2778555711142228, "frac_reward_zero_std": 0.0, "grad_norm": 2.8533955068373826, "kl": 0.00876617431640625, "learning_rate": 9.162247150834647e-07, "loss": 0.0, "num_tokens": 60305109.0, "reward": 0.0, "reward_std": 0.6584036350250244, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.060784437580004666, "rewards/wordcountpos_reward/raw_geo/std": 0.08400836683962326, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1245.4375, "completions/mean_terminated_length": 1209.071533203125, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.27805561112222443, "frac_reward_zero_std": 0.0, "grad_norm": 3.1302470566562377, "kl": 0.0109405517578125, "learning_rate": 9.160420275216143e-07, "loss": 0.0321, "num_tokens": 60341028.0, "reward": -1.862645149230957e-08, "reward_std": 1.0534486770629883, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027637515891455113, "rewards/wordcountpos_reward/raw_geo/std": 0.0659749422925075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1372.375, "completions/mean_terminated_length": 1329.8333740234375, "completions/min_length": 1104.0, "completions/min_terminated_length": 1104.0, "epoch": 0.27825565113022604, "frac_reward_zero_std": 0.0, "grad_norm": 2.6027572542391426, "kl": 0.00890350341796875, "learning_rate": 9.158591614760093e-07, "loss": -0.0086, "num_tokens": 60388514.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9040713310241699, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17927903611793783, "rewards/wordcountpos_reward/raw_geo/std": 0.1986670529308137, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1023.1875, "completions/mean_terminated_length": 1023.1875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.27845569113822766, "frac_reward_zero_std": 0.0, "grad_norm": 3.6358193470144036, "kl": 0.0148468017578125, "learning_rate": 9.156761170358157e-07, "loss": -0.0116, "num_tokens": 60420573.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0098563432693481, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001829963679511255, "rewards/wordcountpos_reward/raw_geo/std": 0.0781881984245527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1122.3125, "completions/mean_terminated_length": 1097.1334228515625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.27865573114622927, "frac_reward_zero_std": 0.0, "grad_norm": 2.4969738299342152, "kl": 0.00811004638671875, "learning_rate": 9.154928942902871e-07, "loss": 0.0218, "num_tokens": 60477450.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8635658025741577, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16187138590245986, "rewards/wordcountpos_reward/raw_geo/std": 0.17641266496894842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1458055529095489, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1114.75, "completions/mean_terminated_length": 1114.75, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.2788557711542308, "frac_reward_zero_std": 0.0, "grad_norm": 2.6937617133765004, "kl": 0.005733489990234375, "learning_rate": 9.153094933287635e-07, "loss": -0.0058, "num_tokens": 60514174.0, "reward": 0.0, "reward_std": 0.7024930119514465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025331060471821844, "rewards/wordcountpos_reward/raw_geo/std": 0.0690205858861114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1322.0625, "completions/mean_terminated_length": 1310.2000732421875, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.27905581116223244, "frac_reward_zero_std": 0.0, "grad_norm": 2.4181775509293018, "kl": 0.009765625, "learning_rate": 9.15125914240672e-07, "loss": -0.0291, "num_tokens": 60564591.0, "reward": 4.470348358154297e-08, "reward_std": 0.9179534912109375, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03233699560197412, "rewards/wordcountpos_reward/raw_geo/std": 0.08916381968352377, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.12292725943057184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1072.375, "completions/mean_terminated_length": 1072.375, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.27925585117023405, "frac_reward_zero_std": 0.0, "grad_norm": 3.471388624322902, "kl": 0.010955810546875, "learning_rate": 9.149421571155269e-07, "loss": -0.0255, "num_tokens": 60610949.0, "reward": 0.0, "reward_std": 0.8790923357009888, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0758267658493836, "rewards/wordcountpos_reward/raw_geo/std": 0.24199525282344028, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1370.0, "completions/mean_terminated_length": 1310.9091796875, "completions/min_length": 1169.0, "completions/min_terminated_length": 1169.0, "epoch": 0.27945589117823566, "frac_reward_zero_std": 0.0, "grad_norm": 2.8391010472620946, "kl": 0.0278472900390625, "learning_rate": 9.14758222042929e-07, "loss": -0.0236, "num_tokens": 60668997.0, "reward": 0.0, "reward_std": 1.0483356714248657, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012110993128130854, "rewards/wordcountpos_reward/raw_geo/std": 0.11317337540671665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1246.375, "completions/mean_terminated_length": 1229.4666748046875, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.27965593118623727, "frac_reward_zero_std": 0.0, "grad_norm": 2.859472757284942, "kl": 0.013946533203125, "learning_rate": 9.145741091125653e-07, "loss": 0.0013, "num_tokens": 60717571.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9298990964889526, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13493727839137018, "rewards/wordcountpos_reward/raw_geo/std": 0.1848709559123489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 1224.8125, "completions/mean_terminated_length": 949.625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.27985597119423883, "frac_reward_zero_std": 0.0, "grad_norm": 2.758751563743274, "kl": 0.0103912353515625, "learning_rate": 9.143898184142107e-07, "loss": 0.0057, "num_tokens": 60761488.0, "reward": 0.0, "reward_std": 0.9024456739425659, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05697900253738435, "rewards/wordcountpos_reward/raw_geo/std": 0.16501723273535449, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1089.25, "completions/mean_terminated_length": 1089.25, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.28005601120224044, "frac_reward_zero_std": 0.0, "grad_norm": 3.340532550121953, "kl": 0.01507568359375, "learning_rate": 9.142053500377258e-07, "loss": 0.0431, "num_tokens": 60805076.0, "reward": -2.2351741790771484e-08, "reward_std": 1.052138328552246, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03481408316901882, "rewards/wordcountpos_reward/raw_geo/std": 0.1715353872668126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1135.875, "completions/mean_terminated_length": 1135.875, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.28025605121024205, "frac_reward_zero_std": 0.0, "grad_norm": 3.5706284087083304, "kl": 0.018280029296875, "learning_rate": 9.140207040730587e-07, "loss": -0.0007, "num_tokens": 60856314.0, "reward": 2.9802322387695312e-08, "reward_std": 1.026914358139038, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08055660424274584, "rewards/wordcountpos_reward/raw_geo/std": 0.13011315507970053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1225.375, "completions/mean_terminated_length": 1207.0667724609375, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.28045609121824366, "frac_reward_zero_std": 0.0, "grad_norm": 2.8888657307053447, "kl": 0.00897979736328125, "learning_rate": 9.138358806102432e-07, "loss": -0.0221, "num_tokens": 60896328.0, "reward": -1.4901161193847656e-08, "reward_std": 1.039982795715332, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.047106577752333754, "rewards/wordcountpos_reward/raw_geo/std": 0.07876730222779964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.2806561312262453, "frac_reward_zero_std": 0.0, "grad_norm": 3.28062145839446, "kl": 0.011688232421875, "learning_rate": 9.136508797393999e-07, "loss": -0.0279, "num_tokens": 60942405.0, "reward": 0.0, "reward_std": 0.3488418459892273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021329339379944856, "rewards/wordcountpos_reward/raw_geo/std": 0.15904756894326477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1070.3125, "completions/mean_terminated_length": 1070.3125, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.28085617123424683, "frac_reward_zero_std": 0.0, "grad_norm": 3.5226019399508246, "kl": 0.0145721435546875, "learning_rate": 9.134657015507367e-07, "loss": 0.0012, "num_tokens": 60993634.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9390586614608765, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.041611643642463406, "rewards/wordcountpos_reward/raw_geo/std": 0.10000222250740834, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1213.0625, "completions/mean_terminated_length": 1213.0625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.28105621124224844, "frac_reward_zero_std": 0.0, "grad_norm": 3.394117089295289, "kl": 0.0121002197265625, "learning_rate": 9.132803461345468e-07, "loss": -0.0273, "num_tokens": 61033331.0, "reward": 1.4901161193847656e-08, "reward_std": 1.044857144355774, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16409514266286096, "rewards/wordcountpos_reward/raw_geo/std": 0.09849294878670774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1247.1875, "completions/mean_terminated_length": 1247.1875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.28125625125025006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1509064024289497, "kl": 0.011505126953125, "learning_rate": 9.130948135812105e-07, "loss": -0.0369, "num_tokens": 61080814.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0137759447097778, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10340731870290201, "rewards/wordcountpos_reward/raw_geo/std": 0.22499057309146034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.28145629125825167, "frac_reward_zero_std": 0.0, "grad_norm": 3.198765305430847, "kl": 0.011749267578125, "learning_rate": 9.129091039811944e-07, "loss": -0.046, "num_tokens": 61118059.0, "reward": -2.9802322387695312e-08, "reward_std": 0.961811363697052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.28597241753409464, "rewards/wordcountpos_reward/raw_geo/std": 0.11224233033028823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1039.0, "completions/max_terminated_length": 1039.0, "completions/mean_length": 902.375, "completions/mean_terminated_length": 902.375, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.2816563312662532, "frac_reward_zero_std": 0.0, "grad_norm": 2.407459661893286, "kl": 0.00640869140625, "learning_rate": 9.127232174250511e-07, "loss": -0.0175, "num_tokens": 61152185.0, "reward": 7.450580596923828e-09, "reward_std": 1.009409785270691, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07016394094188246, "rewards/wordcountpos_reward/raw_geo/std": 0.06222900023738038, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1080.0, "completions/mean_terminated_length": 1080.0, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.28185637127425484, "frac_reward_zero_std": 0.0, "grad_norm": 3.5100415793474813, "kl": 0.012908935546875, "learning_rate": 9.125371540034202e-07, "loss": 0.0034, "num_tokens": 61188313.0, "reward": 0.0, "reward_std": 0.7352811694145203, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12839352769421766, "rewards/wordcountpos_reward/raw_geo/std": 0.11672063972840939, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1060.8125, "completions/mean_terminated_length": 1060.8125, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.28205641128225645, "frac_reward_zero_std": 0.0, "grad_norm": 2.953119358459673, "kl": 0.0134735107421875, "learning_rate": 9.123509138070265e-07, "loss": -0.0695, "num_tokens": 61221454.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0047800540924072, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07318731766562347, "rewards/wordcountpos_reward/raw_geo/std": 0.08146496729947218, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1115.9375, "completions/mean_terminated_length": 1115.9375, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.28225645129025806, "frac_reward_zero_std": 0.0, "grad_norm": 3.093505310480765, "kl": 0.01263427734375, "learning_rate": 9.121644969266819e-07, "loss": -0.0149, "num_tokens": 61263653.0, "reward": 0.0, "reward_std": 0.7194733619689941, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19166954442962117, "rewards/wordcountpos_reward/raw_geo/std": 0.23097975328680015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1186.1875, "completions/mean_terminated_length": 1165.2667236328125, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.28245649129825967, "frac_reward_zero_std": 0.0, "grad_norm": 3.276809477300148, "kl": 0.0137786865234375, "learning_rate": 9.11977903453284e-07, "loss": -0.0738, "num_tokens": 61318640.0, "reward": 7.450580596923828e-09, "reward_std": 1.048647403717041, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07428405352559575, "rewards/wordcountpos_reward/raw_geo/std": 0.1190818858808534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1251.5, "completions/mean_terminated_length": 1234.933349609375, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.2826565313062612, "frac_reward_zero_std": 0.0, "grad_norm": 2.458683276654638, "kl": 0.007568359375, "learning_rate": 9.117911334778167e-07, "loss": 0.0384, "num_tokens": 61372528.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7630959749221802, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06604938458179417, "rewards/wordcountpos_reward/raw_geo/std": 0.20768778526773846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1173.5, "completions/mean_terminated_length": 1173.5, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.28285657131426284, "frac_reward_zero_std": 0.0, "grad_norm": 3.6553934176518825, "kl": 0.013824462890625, "learning_rate": 9.116041870913498e-07, "loss": -0.0088, "num_tokens": 61412368.0, "reward": 0.0, "reward_std": 1.0296192169189453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15830651054739794, "rewards/wordcountpos_reward/raw_geo/std": 0.3277690323617169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1164.4375, "completions/mean_terminated_length": 1164.4375, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.28305661132226445, "frac_reward_zero_std": 0.0, "grad_norm": 3.0205958375516304, "kl": 0.0125885009765625, "learning_rate": 9.114170643850393e-07, "loss": -0.0114, "num_tokens": 61462791.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8752886056900024, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06802981831711734, "rewards/wordcountpos_reward/raw_geo/std": 0.052331723360420176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1219.4375, "completions/mean_terminated_length": 1219.4375, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.28325665133026606, "frac_reward_zero_std": 0.0, "grad_norm": 3.34903630805863, "kl": 0.01397705078125, "learning_rate": 9.11229765450127e-07, "loss": 0.0141, "num_tokens": 61505686.0, "reward": 0.0, "reward_std": 0.7645179033279419, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26829285807831027, "rewards/wordcountpos_reward/raw_geo/std": 0.102550054786711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1144.25, "completions/mean_terminated_length": 1144.25, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.2834566913382677, "frac_reward_zero_std": 0.0, "grad_norm": 3.108151669114976, "kl": 0.011505126953125, "learning_rate": 9.110422903779408e-07, "loss": -0.0565, "num_tokens": 61547962.0, "reward": 0.0, "reward_std": 0.8635854721069336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10239790319860115, "rewards/wordcountpos_reward/raw_geo/std": 0.1684096718898323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1061.625, "completions/mean_terminated_length": 1061.625, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.28365673134626923, "frac_reward_zero_std": 0.0, "grad_norm": 2.9902954024825954, "kl": 0.01169586181640625, "learning_rate": 9.108546392598945e-07, "loss": 0.0166, "num_tokens": 61585140.0, "reward": 0.0, "reward_std": 0.9319888353347778, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005817982719527747, "rewards/wordcountpos_reward/raw_geo/std": 0.0785972315239089, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1111.5625, "completions/mean_terminated_length": 1111.5625, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.28385677135427084, "frac_reward_zero_std": 0.0, "grad_norm": 3.0608009052326564, "kl": 0.008331298828125, "learning_rate": 9.106668121874873e-07, "loss": 0.0125, "num_tokens": 61621029.0, "reward": 0.0, "reward_std": 0.766880452632904, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12466408091652723, "rewards/wordcountpos_reward/raw_geo/std": 0.11572453519138466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1361.6875, "completions/mean_terminated_length": 1183.857177734375, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.28405681136227245, "frac_reward_zero_std": 0.0, "grad_norm": 2.6030415566219904, "kl": 0.0117340087890625, "learning_rate": 9.104788092523048e-07, "loss": -0.0387, "num_tokens": 61677536.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9692643284797668, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03903487772093722, "rewards/wordcountpos_reward/raw_geo/std": 0.1753418148802447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15910630036178586, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1225.0625, "completions/mean_terminated_length": 1133.416748046875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.28425685137027407, "frac_reward_zero_std": 0.0, "grad_norm": 2.6940880465172916, "kl": 0.0140380859375, "learning_rate": 9.10290630546018e-07, "loss": -0.1308, "num_tokens": 61721713.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9340627789497375, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06334090977059484, "rewards/wordcountpos_reward/raw_geo/std": 0.09202209244918899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1746424919657298, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 948.4375, "completions/mean_terminated_length": 948.4375, "completions/min_length": 656.0, "completions/min_terminated_length": 656.0, "epoch": 0.2844568913782757, "frac_reward_zero_std": 0.0, "grad_norm": 4.0159451943825655, "kl": 0.015167236328125, "learning_rate": 9.101022761603838e-07, "loss": 0.0092, "num_tokens": 61752800.0, "reward": -4.470348358154297e-08, "reward_std": 0.9512420892715454, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0166592115907759, "rewards/wordcountpos_reward/raw_geo/std": 0.02306741491050495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1196.875, "completions/mean_terminated_length": 1176.666748046875, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.28465693138627723, "frac_reward_zero_std": 0.0, "grad_norm": 2.732968073261137, "kl": 0.0109100341796875, "learning_rate": 9.099137461872442e-07, "loss": 0.0079, "num_tokens": 61796486.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9555996656417847, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09531989835351454, "rewards/wordcountpos_reward/raw_geo/std": 0.14151302477176167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1027.625, "completions/mean_terminated_length": 1027.625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.28485697139427885, "frac_reward_zero_std": 0.0, "grad_norm": 3.876026713182956, "kl": 0.015533447265625, "learning_rate": 9.09725040718528e-07, "loss": -0.0312, "num_tokens": 61840184.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9122369289398193, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06729442328936362, "rewards/wordcountpos_reward/raw_geo/std": 0.12909710838971472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1130.2000732421875, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.28505701140228046, "frac_reward_zero_std": 0.0, "grad_norm": 3.198453997245997, "kl": 0.010589599609375, "learning_rate": 9.095361598462483e-07, "loss": -0.021, "num_tokens": 61882254.0, "reward": 0.0, "reward_std": 0.9591430425643921, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.026527549385203953, "rewards/wordcountpos_reward/raw_geo/std": 0.11430009243375268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1404.5, "completions/mean_terminated_length": 1281.71435546875, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.28525705141028207, "frac_reward_zero_std": 0.0, "grad_norm": 3.2792028494388274, "kl": 0.0125732421875, "learning_rate": 9.093471036625046e-07, "loss": -0.0101, "num_tokens": 61935734.0, "reward": -7.450580596923828e-09, "reward_std": 1.0573853254318237, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09118229364441431, "rewards/wordcountpos_reward/raw_geo/std": 0.09009431404291929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 1055.0625, "completions/mean_terminated_length": 1025.4000244140625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.2854570914182837, "frac_reward_zero_std": 0.0, "grad_norm": 3.5455844711763955, "kl": 0.011474609375, "learning_rate": 9.091578722594811e-07, "loss": -0.0157, "num_tokens": 61966927.0, "reward": 1.862645149230957e-08, "reward_std": 1.0687472820281982, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04691214056755259, "rewards/wordcountpos_reward/raw_geo/std": 0.0661403137723392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1006.75, "completions/mean_terminated_length": 1006.75, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.28565713142628524, "frac_reward_zero_std": 0.0, "grad_norm": 2.5435659637659245, "kl": 0.0093231201171875, "learning_rate": 9.089684657294485e-07, "loss": -0.049, "num_tokens": 62003747.0, "reward": 0.0, "reward_std": 1.027086615562439, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22486759902127695, "rewards/wordcountpos_reward/raw_geo/std": 0.25593635309590834, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1092.375, "completions/mean_terminated_length": 1092.375, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.28585717143428685, "frac_reward_zero_std": 0.0, "grad_norm": 3.4834189959099735, "kl": 0.0149993896484375, "learning_rate": 9.087788841647619e-07, "loss": -0.0134, "num_tokens": 62043457.0, "reward": 0.0, "reward_std": 0.9337885975837708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11889745881981413, "rewards/wordcountpos_reward/raw_geo/std": 0.16351219621503862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1016.5, "completions/mean_terminated_length": 1016.5, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.28605721144228846, "frac_reward_zero_std": 0.0, "grad_norm": 3.406045296365549, "kl": 0.0119781494140625, "learning_rate": 9.085891276578621e-07, "loss": 0.0054, "num_tokens": 62075681.0, "reward": 0.0, "reward_std": 0.6838955879211426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0019982397269586527, "rewards/wordcountpos_reward/raw_geo/std": 0.060939103777697526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1091.0625, "completions/mean_terminated_length": 1063.800048828125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.2862572514502901, "frac_reward_zero_std": 0.0, "grad_norm": 2.882823054837441, "kl": 0.0104522705078125, "learning_rate": 9.083991963012753e-07, "loss": -0.0109, "num_tokens": 62126626.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9261794686317444, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023166729783343547, "rewards/wordcountpos_reward/raw_geo/std": 0.1326399105086657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1119.75, "completions/mean_terminated_length": 1119.75, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.2864572914582917, "frac_reward_zero_std": 0.0, "grad_norm": 3.0442541290074376, "kl": 0.0121612548828125, "learning_rate": 9.082090901876131e-07, "loss": 0.0108, "num_tokens": 62168030.0, "reward": 0.0, "reward_std": 0.5862631797790527, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1623374306106013, "rewards/wordcountpos_reward/raw_geo/std": 0.20840766676787967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1142.5, "completions/mean_terminated_length": 1142.5, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.28665733146629324, "frac_reward_zero_std": 0.0, "grad_norm": 3.2686045048162846, "kl": 0.0131683349609375, "learning_rate": 9.080188094095717e-07, "loss": 0.0164, "num_tokens": 62206358.0, "reward": 0.0, "reward_std": 0.9273918271064758, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.036940030843641815, "rewards/wordcountpos_reward/raw_geo/std": 0.1378179969519579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1347.3125, "completions/mean_terminated_length": 1312.0770263671875, "completions/min_length": 1151.0, "completions/min_terminated_length": 1151.0, "epoch": 0.28685737147429485, "frac_reward_zero_std": 0.0, "grad_norm": 3.1060355334989995, "kl": 0.01318359375, "learning_rate": 9.078283540599333e-07, "loss": -0.0352, "num_tokens": 62249539.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9784984588623047, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0701166993773698, "rewards/wordcountpos_reward/raw_geo/std": 0.17176638318784243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1148.125, "completions/mean_terminated_length": 1124.666748046875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.28705741148229647, "frac_reward_zero_std": 0.0, "grad_norm": 3.2827851230503495, "kl": 0.0108184814453125, "learning_rate": 9.076377242315645e-07, "loss": 0.023, "num_tokens": 62303189.0, "reward": -7.450580596923828e-09, "reward_std": 1.0397303104400635, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013142796108544632, "rewards/wordcountpos_reward/raw_geo/std": 0.08275107863964964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1141.625, "completions/mean_terminated_length": 1022.1666870117188, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2872574514902981, "frac_reward_zero_std": 0.0, "grad_norm": 2.9022260815901473, "kl": 0.01263427734375, "learning_rate": 9.074469200174174e-07, "loss": 0.001, "num_tokens": 62355927.0, "reward": 0.0, "reward_std": 0.6853197813034058, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16060916623382235, "rewards/wordcountpos_reward/raw_geo/std": 0.05504937960144757, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 975.4375, "completions/mean_terminated_length": 975.4375, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.28745749149829963, "frac_reward_zero_std": 0.5, "grad_norm": 2.6366982491001685, "kl": 0.01513671875, "learning_rate": 9.07255941510529e-07, "loss": -0.0053, "num_tokens": 62386054.0, "reward": 2.2351741790771484e-08, "reward_std": 0.7467156052589417, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0047600792209254155, "rewards/wordcountpos_reward/raw_geo/std": 0.009419677579760049, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1060.75, "completions/mean_terminated_length": 1060.75, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.28765753150630125, "frac_reward_zero_std": 0.0, "grad_norm": 3.982152914527174, "kl": 0.015228271484375, "learning_rate": 9.070647888040213e-07, "loss": -0.0038, "num_tokens": 62433210.0, "reward": -7.450580596923828e-09, "reward_std": 1.051116704940796, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1402622738382543, "rewards/wordcountpos_reward/raw_geo/std": 0.05415710287503152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1184.6875, "completions/mean_terminated_length": 1184.6875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.28785757151430286, "frac_reward_zero_std": 0.0, "grad_norm": 3.1666054086887634, "kl": 0.0117340087890625, "learning_rate": 9.068734619911009e-07, "loss": 0.0059, "num_tokens": 62480445.0, "reward": 0.0, "reward_std": 0.9157466292381287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.050639453352962945, "rewards/wordcountpos_reward/raw_geo/std": 0.04500294762279917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1136.125, "completions/mean_terminated_length": 1084.1429443359375, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.28805761152230447, "frac_reward_zero_std": 0.0, "grad_norm": 3.6127257019318777, "kl": 0.014617919921875, "learning_rate": 9.066819611650603e-07, "loss": -0.0178, "num_tokens": 62526151.0, "reward": 0.0, "reward_std": 0.9471538066864014, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04549529780207248, "rewards/wordcountpos_reward/raw_geo/std": 0.11488908788444303, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.2882576515303061, "frac_reward_zero_std": 0.0, "grad_norm": 2.36479215666573, "kl": 0.00827789306640625, "learning_rate": 9.064902864192755e-07, "loss": 0.0178, "num_tokens": 62565285.0, "reward": 0.0, "reward_std": 1.0272464752197266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.24254513753330645, "rewards/wordcountpos_reward/raw_geo/std": 0.2797745325025318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1173.1875, "completions/mean_terminated_length": 1151.4000244140625, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.28845769153830764, "frac_reward_zero_std": 0.0, "grad_norm": 3.4801179064641943, "kl": 0.0132904052734375, "learning_rate": 9.062984378472082e-07, "loss": -0.0191, "num_tokens": 62607680.0, "reward": 0.0, "reward_std": 0.47428542375564575, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3120862149079335, "rewards/wordcountpos_reward/raw_geo/std": 0.48378952234024997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.16329931618554522, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.28865773154630925, "frac_reward_zero_std": 0.0, "grad_norm": 3.1022020231140717, "kl": 0.01092529296875, "learning_rate": 9.061064155424049e-07, "loss": -0.0187, "num_tokens": 62647979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9856134653091431, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019645325420136105, "rewards/wordcountpos_reward/raw_geo/std": 0.04902552675106132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1224.9375, "completions/mean_terminated_length": 1206.60009765625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.28885777155431086, "frac_reward_zero_std": 0.0, "grad_norm": 3.343545315805295, "kl": 0.013458251953125, "learning_rate": 9.059142195984962e-07, "loss": 0.0293, "num_tokens": 62696314.0, "reward": -7.450580596923828e-09, "reward_std": 1.0684583187103271, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08204080362934202, "rewards/wordcountpos_reward/raw_geo/std": 0.09430845216030127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1130.5625, "completions/mean_terminated_length": 1130.5625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.28905781156231247, "frac_reward_zero_std": 0.0, "grad_norm": 2.4298549899077098, "kl": 0.01165771484375, "learning_rate": 9.057218501091981e-07, "loss": -0.0403, "num_tokens": 62736451.0, "reward": 0.0, "reward_std": 0.9954296350479126, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.030408648293848026, "rewards/wordcountpos_reward/raw_geo/std": 0.032268355705222525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1311.625, "completions/mean_terminated_length": 1284.71435546875, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.2892578515703141, "frac_reward_zero_std": 0.0, "grad_norm": 2.997163145207722, "kl": 0.01416015625, "learning_rate": 9.055293071683104e-07, "loss": -0.0076, "num_tokens": 62786861.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6685308814048767, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0958720921979418, "rewards/wordcountpos_reward/raw_geo/std": 0.12980651945744268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1006.4375, "completions/mean_terminated_length": 1006.4375, "completions/min_length": 621.0, "completions/min_terminated_length": 621.0, "epoch": 0.28945789157831564, "frac_reward_zero_std": 0.0, "grad_norm": 1.6744484309448755, "kl": 0.004062652587890625, "learning_rate": 9.053365908697181e-07, "loss": -0.0331, "num_tokens": 62834852.0, "reward": 0.0, "reward_std": 0.8366628885269165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09478176851892338, "rewards/wordcountpos_reward/raw_geo/std": 0.0708232886416502, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.6208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1158.0625, "completions/mean_terminated_length": 1158.0625, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.28965793158631725, "frac_reward_zero_std": 0.0, "grad_norm": 3.5241780217148113, "kl": 0.0135040283203125, "learning_rate": 9.051437013073904e-07, "loss": 0.0245, "num_tokens": 62884965.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8428228497505188, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1069153750712083, "rewards/wordcountpos_reward/raw_geo/std": 0.12776473747042635, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1196.8125, "completions/mean_terminated_length": 1153.5, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.28985797159431886, "frac_reward_zero_std": 0.0, "grad_norm": 3.37114446028865, "kl": 0.01226806640625, "learning_rate": 9.049506385753814e-07, "loss": -0.0179, "num_tokens": 62944058.0, "reward": -4.470348358154297e-08, "reward_std": 0.9549105167388916, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03988080949283152, "rewards/wordcountpos_reward/raw_geo/std": 0.1618169811657405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1069.0625, "completions/mean_terminated_length": 1069.0625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.2900580116023205, "frac_reward_zero_std": 0.0, "grad_norm": 3.2739879996675487, "kl": 0.0138702392578125, "learning_rate": 9.047574027678293e-07, "loss": -0.0133, "num_tokens": 62980955.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9645575284957886, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08330779602296855, "rewards/wordcountpos_reward/raw_geo/std": 0.2037769178831789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1114.75, "completions/mean_terminated_length": 1114.75, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.2902580516103221, "frac_reward_zero_std": 0.0, "grad_norm": 2.7936679022605118, "kl": 0.014312744140625, "learning_rate": 9.045639939789566e-07, "loss": -0.0282, "num_tokens": 63030951.0, "reward": 1.4901161193847656e-08, "reward_std": 0.7950387597084045, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10317419512022896, "rewards/wordcountpos_reward/raw_geo/std": 0.15865256278818526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1052.3333740234375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.29045809161832364, "frac_reward_zero_std": 0.0, "grad_norm": 3.629558687933672, "kl": 0.0142364501953125, "learning_rate": 9.043704123030704e-07, "loss": -0.0064, "num_tokens": 63063604.0, "reward": -5.960464477539063e-08, "reward_std": 0.8700249195098877, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.34295059274188905, "rewards/wordcountpos_reward/raw_geo/std": 0.3944995856322822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1165.0625, "completions/mean_terminated_length": 1087.769287109375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.29065813162632526, "frac_reward_zero_std": 0.0, "grad_norm": 2.907760793327239, "kl": 0.0133056640625, "learning_rate": 9.04176657834562e-07, "loss": -0.0519, "num_tokens": 63115541.0, "reward": 5.960464477539063e-08, "reward_std": 0.7884068489074707, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07097501399150928, "rewards/wordcountpos_reward/raw_geo/std": 0.11136324509626805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 1134.625, "completions/mean_terminated_length": 1134.625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.29085817163432687, "frac_reward_zero_std": 0.0, "grad_norm": 2.530164969059736, "kl": 0.007476806640625, "learning_rate": 9.03982730667907e-07, "loss": 0.0071, "num_tokens": 63152255.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0467522144317627, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09472054558276298, "rewards/wordcountpos_reward/raw_geo/std": 0.0675707775882524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1397.6875, "completions/mean_terminated_length": 1383.071533203125, "completions/min_length": 1194.0, "completions/min_terminated_length": 1194.0, "epoch": 0.2910582116423285, "frac_reward_zero_std": 0.0, "grad_norm": 2.1387801673292866, "kl": 0.00675201416015625, "learning_rate": 9.037886308976651e-07, "loss": -0.0174, "num_tokens": 63202954.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9525010585784912, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1831876681146244, "rewards/wordcountpos_reward/raw_geo/std": 0.10197307117165429, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1089.9375, "completions/mean_terminated_length": 1089.9375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.2912582516503301, "frac_reward_zero_std": 0.0, "grad_norm": 3.3811752996676043, "kl": 0.0152740478515625, "learning_rate": 9.035943586184804e-07, "loss": 0.0015, "num_tokens": 63252449.0, "reward": 0.0, "reward_std": 0.41891640424728394, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1702985180822, "rewards/wordcountpos_reward/raw_geo/std": 0.16368960262095725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1339.1875, "completions/mean_terminated_length": 1339.1875, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.29145829165833165, "frac_reward_zero_std": 0.0, "grad_norm": 2.698494445690344, "kl": 0.01025390625, "learning_rate": 9.033999139250807e-07, "loss": -0.0086, "num_tokens": 63299236.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6506620645523071, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013557736177097943, "rewards/wordcountpos_reward/raw_geo/std": 0.028344943746016742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 1006.0, "completions/mean_terminated_length": 1006.0, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.29165833166633326, "frac_reward_zero_std": 0.0, "grad_norm": 3.100998193962974, "kl": 0.0355072021484375, "learning_rate": 9.032052969122781e-07, "loss": -0.0035, "num_tokens": 63330620.0, "reward": 0.0, "reward_std": 0.6489824056625366, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1805165598164709, "rewards/wordcountpos_reward/raw_geo/std": 0.06382372104453993, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1199.4375, "completions/mean_terminated_length": 1179.4000244140625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.29185837167433487, "frac_reward_zero_std": 0.0, "grad_norm": 3.386138669507454, "kl": 0.0152435302734375, "learning_rate": 9.030105076749689e-07, "loss": -0.0198, "num_tokens": 63376755.0, "reward": 0.0, "reward_std": 0.6886649131774902, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3210669576097273, "rewards/wordcountpos_reward/raw_geo/std": 0.15926616728538104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.18856180831641267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 953.1875, "completions/mean_terminated_length": 916.7333984375, "completions/min_length": 617.0, "completions/min_terminated_length": 617.0, "epoch": 0.2920584116823365, "frac_reward_zero_std": 0.0, "grad_norm": 3.59671685423396, "kl": 0.01483154296875, "learning_rate": 9.02815546308133e-07, "loss": -0.0531, "num_tokens": 63414678.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9387416839599609, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05132582857210057, "rewards/wordcountpos_reward/raw_geo/std": 0.08207116892225662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1160.125, "completions/mean_terminated_length": 895.7777709960938, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.2922584516903381, "frac_reward_zero_std": 0.0, "grad_norm": 2.9322333753581513, "kl": 0.011627197265625, "learning_rate": 9.02620412906835e-07, "loss": -0.0558, "num_tokens": 63461128.0, "reward": 0.0, "reward_std": 0.7622572183609009, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13543631733557196, "rewards/wordcountpos_reward/raw_geo/std": 0.15255109929123636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1019.125, "completions/mean_terminated_length": 950.4285888671875, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.29245849169833965, "frac_reward_zero_std": 0.0, "grad_norm": 3.282527783660248, "kl": 0.01265716552734375, "learning_rate": 9.024251075662222e-07, "loss": 0.0258, "num_tokens": 63506722.0, "reward": 0.0, "reward_std": 0.7570434808731079, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0019561692191171886, "rewards/wordcountpos_reward/raw_geo/std": 0.056019712513134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1420.3125, "completions/mean_terminated_length": 1358.3333740234375, "completions/min_length": 1103.0, "completions/min_terminated_length": 1103.0, "epoch": 0.29265853170634126, "frac_reward_zero_std": 0.0, "grad_norm": 2.60731084717123, "kl": 0.0098724365234375, "learning_rate": 9.022296303815266e-07, "loss": -0.0306, "num_tokens": 63560127.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9849776029586792, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048802319442386874, "rewards/wordcountpos_reward/raw_geo/std": 0.12270412185010794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1071.0, "completions/max_terminated_length": 1071.0, "completions/mean_length": 974.375, "completions/mean_terminated_length": 974.375, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.2928585717143429, "frac_reward_zero_std": 0.0, "grad_norm": 3.2571216211718226, "kl": 0.01107025146484375, "learning_rate": 9.02033981448064e-07, "loss": -0.0006, "num_tokens": 63595317.0, "reward": 0.0, "reward_std": 0.795859694480896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10935462328813718, "rewards/wordcountpos_reward/raw_geo/std": 0.09217429870174264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1078.5, "completions/mean_terminated_length": 1050.4000244140625, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.2930586117223445, "frac_reward_zero_std": 0.0, "grad_norm": 3.2872383343896336, "kl": 0.011322021484375, "learning_rate": 9.018381608612335e-07, "loss": 0.0025, "num_tokens": 63635301.0, "reward": 5.960464477539063e-08, "reward_std": 0.37604689598083496, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.212589338598644, "rewards/wordcountpos_reward/raw_geo/std": 0.13518994714939073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1140.1875, "completions/mean_terminated_length": 1140.1875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.29325865173034604, "frac_reward_zero_std": 0.0, "grad_norm": 2.4272363560938714, "kl": 0.00698089599609375, "learning_rate": 9.016421687165179e-07, "loss": -0.0108, "num_tokens": 63670032.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0003583431243896, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07876896043200607, "rewards/wordcountpos_reward/raw_geo/std": 0.09919018675192615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0873477511423713, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1008.75, "completions/mean_terminated_length": 976.0000610351562, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.29345869173834765, "frac_reward_zero_std": 0.0, "grad_norm": 3.7774867122739315, "kl": 0.0142974853515625, "learning_rate": 9.014460051094841e-07, "loss": -0.0315, "num_tokens": 63711076.0, "reward": 5.960464477539063e-08, "reward_std": 0.8494784832000732, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15095108051854517, "rewards/wordcountpos_reward/raw_geo/std": 0.25774572232462695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1042.8125, "completions/mean_terminated_length": 1042.8125, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.29365873174634927, "frac_reward_zero_std": 0.0, "grad_norm": 3.2962966635665474, "kl": 0.0138092041015625, "learning_rate": 9.012496701357824e-07, "loss": -0.0341, "num_tokens": 63756673.0, "reward": -1.4901161193847656e-08, "reward_std": 1.053252100944519, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029444167556084474, "rewards/wordcountpos_reward/raw_geo/std": 0.03939921715924935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 1040.875, "completions/mean_terminated_length": 1040.875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.2938587717543509, "frac_reward_zero_std": 0.0, "grad_norm": 3.3214026164414063, "kl": 0.0142059326171875, "learning_rate": 9.010531638911465e-07, "loss": -0.0332, "num_tokens": 63793879.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0144805908203125, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015375730814412644, "rewards/wordcountpos_reward/raw_geo/std": 0.08224743062757722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1052.5625, "completions/mean_terminated_length": 1052.5625, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.2940588117623525, "frac_reward_zero_std": 0.0, "grad_norm": 3.270719373495854, "kl": 0.01346588134765625, "learning_rate": 9.008564864713938e-07, "loss": -0.0328, "num_tokens": 63828504.0, "reward": 2.9802322387695312e-08, "reward_std": 0.46263664960861206, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11276991566135867, "rewards/wordcountpos_reward/raw_geo/std": 0.188062190156898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1111.6875, "completions/mean_terminated_length": 1111.6875, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.29425885177035405, "frac_reward_zero_std": 0.0, "grad_norm": 3.611614343065168, "kl": 0.01434326171875, "learning_rate": 9.006596379724251e-07, "loss": -0.0378, "num_tokens": 63870835.0, "reward": 0.0, "reward_std": 0.873188316822052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3010946281655233, "rewards/wordcountpos_reward/raw_geo/std": 0.24577298687623822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1338.3125, "completions/mean_terminated_length": 1241.300048828125, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.29445889177835566, "frac_reward_zero_std": 0.0, "grad_norm": 2.979333702855172, "kl": 0.0115509033203125, "learning_rate": 9.004626184902246e-07, "loss": 0.005, "num_tokens": 63915232.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4926319718360901, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09794124717938814, "rewards/wordcountpos_reward/raw_geo/std": 0.08788800550361218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1350.0, "completions/mean_terminated_length": 1281.8182373046875, "completions/min_length": 1108.0, "completions/min_terminated_length": 1108.0, "epoch": 0.29465893178635727, "frac_reward_zero_std": 0.0, "grad_norm": 2.4315544037012757, "kl": 0.00914764404296875, "learning_rate": 9.002654281208598e-07, "loss": 0.0206, "num_tokens": 63951000.0, "reward": -3.725290298461914e-09, "reward_std": 0.890720546245575, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06162392739483719, "rewards/wordcountpos_reward/raw_geo/std": 0.0862484596843867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 982.4375, "completions/mean_terminated_length": 982.4375, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.2948589717943589, "frac_reward_zero_std": 0.0, "grad_norm": 3.3396659014262204, "kl": 0.0169525146484375, "learning_rate": 9.000680669604819e-07, "loss": -0.0131, "num_tokens": 63988367.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0485255718231201, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03739086537212785, "rewards/wordcountpos_reward/raw_geo/std": 0.09627541169934727, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1138550085106622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 995.8125, "completions/mean_terminated_length": 995.8125, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.2950590118023605, "frac_reward_zero_std": 0.0, "grad_norm": 3.4118140314189827, "kl": 0.0121307373046875, "learning_rate": 8.998705351053248e-07, "loss": 0.0037, "num_tokens": 64027732.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7950841188430786, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06555033931700137, "rewards/wordcountpos_reward/raw_geo/std": 0.07034175343903533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1077.0625, "completions/mean_terminated_length": 1077.0625, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.29525905181036205, "frac_reward_zero_std": 0.0, "grad_norm": 3.431857491456502, "kl": 0.0131378173828125, "learning_rate": 8.996728326517062e-07, "loss": -0.0573, "num_tokens": 64077693.0, "reward": 0.0, "reward_std": 0.7284241914749146, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.013188125732718426, "rewards/wordcountpos_reward/raw_geo/std": 0.14356295921406018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1133.5, "completions/mean_terminated_length": 1109.0667724609375, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.29545909181836366, "frac_reward_zero_std": 0.0, "grad_norm": 2.2527113721395042, "kl": 0.00705718994140625, "learning_rate": 8.994749596960264e-07, "loss": -0.0215, "num_tokens": 64129941.0, "reward": 0.0, "reward_std": 0.9835470914840698, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12699088070444914, "rewards/wordcountpos_reward/raw_geo/std": 0.05648089467838851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 883.875, "completions/mean_terminated_length": 883.875, "completions/min_length": 628.0, "completions/min_terminated_length": 628.0, "epoch": 0.2956591318263653, "frac_reward_zero_std": 0.0, "grad_norm": 3.7440388674068736, "kl": 0.0107574462890625, "learning_rate": 8.992769163347695e-07, "loss": 0.0137, "num_tokens": 64167267.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8804106116294861, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01646649146547831, "rewards/wordcountpos_reward/raw_geo/std": 0.09721029831829282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1244.0, "completions/mean_terminated_length": 1226.933349609375, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.2958591718343669, "frac_reward_zero_std": 0.0, "grad_norm": 2.823684789413533, "kl": 0.01018524169921875, "learning_rate": 8.99078702664502e-07, "loss": 0.0069, "num_tokens": 64221379.0, "reward": 0.0, "reward_std": 0.7385225296020508, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23240210799676453, "rewards/wordcountpos_reward/raw_geo/std": 0.13867578949573683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1125.666748046875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.2960592118423685, "frac_reward_zero_std": 0.0, "grad_norm": 3.3930398243728552, "kl": 0.0128631591796875, "learning_rate": 8.98880318781874e-07, "loss": 0.0242, "num_tokens": 64261660.0, "reward": 7.450580596923828e-09, "reward_std": 0.7898614406585693, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.061337632571661374, "rewards/wordcountpos_reward/raw_geo/std": 0.04752054456784415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1251.8125, "completions/mean_terminated_length": 1216.357177734375, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.29625925185037005, "frac_reward_zero_std": 0.0, "grad_norm": 3.3341993468259337, "kl": 0.0147705078125, "learning_rate": 8.986817647836183e-07, "loss": -0.019, "num_tokens": 64308369.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7689966559410095, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07526627707586311, "rewards/wordcountpos_reward/raw_geo/std": 0.18979758190689364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1067.4375, "completions/mean_terminated_length": 1067.4375, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.29645929185837167, "frac_reward_zero_std": 0.0, "grad_norm": 3.282611817015571, "kl": 0.010986328125, "learning_rate": 8.984830407665508e-07, "loss": -0.0003, "num_tokens": 64349400.0, "reward": 5.960464477539063e-08, "reward_std": 0.7352669835090637, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08747978256397394, "rewards/wordcountpos_reward/raw_geo/std": 0.09140865488882069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1288.875, "completions/mean_terminated_length": 1218.5, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.2966593318663733, "frac_reward_zero_std": 0.0, "grad_norm": 3.543544277266936, "kl": 0.015106201171875, "learning_rate": 8.982841468275702e-07, "loss": -0.0366, "num_tokens": 64397334.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5321323871612549, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3596431703313685, "rewards/wordcountpos_reward/raw_geo/std": 0.16833698207965983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1092.3125, "completions/mean_terminated_length": 1092.3125, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.2968593718743749, "frac_reward_zero_std": 0.0, "grad_norm": 3.0396470994722247, "kl": 0.011993408203125, "learning_rate": 8.980850830636581e-07, "loss": 0.0154, "num_tokens": 64435579.0, "reward": 0.0, "reward_std": 0.8392201662063599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13006627566445106, "rewards/wordcountpos_reward/raw_geo/std": 0.09917892053397387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1142.0625, "completions/mean_terminated_length": 1142.0625, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.2970594118823765, "frac_reward_zero_std": 0.0, "grad_norm": 2.631411584974668, "kl": 0.008941650390625, "learning_rate": 8.978858495718789e-07, "loss": -0.0484, "num_tokens": 64478340.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9899147152900696, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13299359940299732, "rewards/wordcountpos_reward/raw_geo/std": 0.05453504320482447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1084.0625, "completions/mean_terminated_length": 1024.6429443359375, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.29725945189037806, "frac_reward_zero_std": 0.0, "grad_norm": 2.639221648776102, "kl": 0.0066680908203125, "learning_rate": 8.976864464493796e-07, "loss": -0.0226, "num_tokens": 64522693.0, "reward": 0.0, "reward_std": 0.7909857034683228, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06510352959731772, "rewards/wordcountpos_reward/raw_geo/std": 0.09662416680032102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1264.5, "completions/mean_terminated_length": 1157.45458984375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.29745949189837967, "frac_reward_zero_std": 0.0, "grad_norm": 3.050306170018354, "kl": 0.0139923095703125, "learning_rate": 8.974868737933903e-07, "loss": -0.0164, "num_tokens": 64567949.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8124679327011108, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3103681704876814, "rewards/wordcountpos_reward/raw_geo/std": 0.0867463720802633, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1021.0, "completions/mean_terminated_length": 1021.0, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.2976595319063813, "frac_reward_zero_std": 0.0, "grad_norm": 2.834110721046252, "kl": 0.011505126953125, "learning_rate": 8.972871317012235e-07, "loss": 0.0315, "num_tokens": 64606853.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0578687191009521, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14952008359432423, "rewards/wordcountpos_reward/raw_geo/std": 0.1078846165093754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1066.0625, "completions/mean_terminated_length": 1066.0625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.2978595719143829, "frac_reward_zero_std": 0.0, "grad_norm": 2.635810518763671, "kl": 0.0156097412109375, "learning_rate": 8.970872202702742e-07, "loss": -0.0585, "num_tokens": 64640878.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9582006931304932, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08249479474259085, "rewards/wordcountpos_reward/raw_geo/std": 0.09163353459354723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1180.8125, "completions/mean_terminated_length": 1180.8125, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.2980596119223845, "frac_reward_zero_std": 0.0, "grad_norm": 3.220851269784932, "kl": 0.014892578125, "learning_rate": 8.968871395980201e-07, "loss": -0.0004, "num_tokens": 64688323.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8297221064567566, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11318538967370452, "rewards/wordcountpos_reward/raw_geo/std": 0.09185277935910437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1204.0, "completions/mean_terminated_length": 1204.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.29825965193038606, "frac_reward_zero_std": 0.0, "grad_norm": 3.3368590393809066, "kl": 0.014312744140625, "learning_rate": 8.966868897820217e-07, "loss": -0.0322, "num_tokens": 64732811.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0384639501571655, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010900561255736942, "rewards/wordcountpos_reward/raw_geo/std": 0.13743541736268067, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 794.0, "completions/min_terminated_length": 794.0, "epoch": 0.2984596919383877, "frac_reward_zero_std": 0.0, "grad_norm": 3.5284182634042405, "kl": 0.0128631591796875, "learning_rate": 8.964864709199216e-07, "loss": 0.001, "num_tokens": 64766091.0, "reward": 0.0, "reward_std": 0.8546037673950195, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0560437308105306, "rewards/wordcountpos_reward/raw_geo/std": 0.061560006982438284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 934.8125, "completions/mean_terminated_length": 934.8125, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.2986597319463893, "frac_reward_zero_std": 0.0, "grad_norm": 3.373517887641331, "kl": 0.0097808837890625, "learning_rate": 8.962858831094447e-07, "loss": -0.0251, "num_tokens": 64794240.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0567591190338135, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15887111797345885, "rewards/wordcountpos_reward/raw_geo/std": 0.07918070372071889, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1141.1875, "completions/mean_terminated_length": 1141.1875, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.2988597719543909, "frac_reward_zero_std": 0.0, "grad_norm": 3.3645402165793046, "kl": 0.015350341796875, "learning_rate": 8.960851264483989e-07, "loss": -0.0045, "num_tokens": 64840099.0, "reward": -4.470348358154297e-08, "reward_std": 0.9575287103652954, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18236224770281134, "rewards/wordcountpos_reward/raw_geo/std": 0.08057282423217577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1261.0625, "completions/mean_terminated_length": 1152.45458984375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.29905981196239245, "frac_reward_zero_std": 0.0, "grad_norm": 2.92627235551055, "kl": 0.01119232177734375, "learning_rate": 8.95884201034674e-07, "loss": -0.0009, "num_tokens": 64884820.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9978117942810059, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0890794109788454, "rewards/wordcountpos_reward/raw_geo/std": 0.14328580255528114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 976.875, "completions/mean_terminated_length": 976.875, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.29925985197039406, "frac_reward_zero_std": 0.0, "grad_norm": 3.8188086695178147, "kl": 0.01422119140625, "learning_rate": 8.956831069662418e-07, "loss": -0.0015, "num_tokens": 64933074.0, "reward": 0.0, "reward_std": 1.0199470520019531, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027670575276743683, "rewards/wordcountpos_reward/raw_geo/std": 0.2272771345521765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1172.5625, "completions/mean_terminated_length": 1172.5625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.2994598919783957, "frac_reward_zero_std": 0.0, "grad_norm": 3.332621403984407, "kl": 0.0130462646484375, "learning_rate": 8.954818443411573e-07, "loss": -0.0075, "num_tokens": 64970147.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6304559111595154, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10435015607697648, "rewards/wordcountpos_reward/raw_geo/std": 0.05552716214907367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1080.1875, "completions/mean_terminated_length": 1080.1875, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.2996599319863973, "frac_reward_zero_std": 0.0, "grad_norm": 2.8890431651908743, "kl": 0.012115478515625, "learning_rate": 8.952804132575563e-07, "loss": -0.0333, "num_tokens": 65020622.0, "reward": -5.960464477539063e-08, "reward_std": 0.47698742151260376, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3057601877730295, "rewards/wordcountpos_reward/raw_geo/std": 0.2076816826229056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1268.3125, "completions/mean_terminated_length": 1214.84619140625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.2998599719943989, "frac_reward_zero_std": 0.0, "grad_norm": 3.1205365609612774, "kl": 0.01275634765625, "learning_rate": 8.950788138136581e-07, "loss": 0.0104, "num_tokens": 65068131.0, "reward": 0.0, "reward_std": 0.351090669631958, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1163190974764945, "rewards/wordcountpos_reward/raw_geo/std": 0.2405127123551206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.17379212785308693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 1019.0, "completions/mean_terminated_length": 1019.0, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.30006001200240046, "frac_reward_zero_std": 0.0, "grad_norm": 3.223415423046262, "kl": 0.00958251953125, "learning_rate": 8.948770461077634e-07, "loss": -0.0221, "num_tokens": 65105851.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9944062232971191, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0954593473430376, "rewards/wordcountpos_reward/raw_geo/std": 0.04040350468251881, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1212.0, "completions/mean_terminated_length": 1116.0, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.30026005201040207, "frac_reward_zero_std": 0.0, "grad_norm": 3.4742908990646018, "kl": 0.016845703125, "learning_rate": 8.946751102382548e-07, "loss": -0.0141, "num_tokens": 65154067.0, "reward": 3.725290298461914e-08, "reward_std": 1.0562506914138794, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0051511624163113326, "rewards/wordcountpos_reward/raw_geo/std": 0.10285652826258446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1232.5625, "completions/mean_terminated_length": 1232.5625, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.3004600920184037, "frac_reward_zero_std": 0.0, "grad_norm": 2.551717769585929, "kl": 0.00740814208984375, "learning_rate": 8.944730063035973e-07, "loss": 0.0178, "num_tokens": 65194148.0, "reward": 7.450580596923828e-09, "reward_std": 1.000548005104065, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11724307523568306, "rewards/wordcountpos_reward/raw_geo/std": 0.04635386610353036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.3006601320264053, "frac_reward_zero_std": 0.0, "grad_norm": 3.3612995099052623, "kl": 0.016510009765625, "learning_rate": 8.942707344023379e-07, "loss": -0.0402, "num_tokens": 65237998.0, "reward": -7.450580596923828e-09, "reward_std": 1.0554802417755127, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13163303920657526, "rewards/wordcountpos_reward/raw_geo/std": 0.050153446665678975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1250.5, "completions/mean_terminated_length": 1250.5, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.3008601720344069, "frac_reward_zero_std": 0.0, "grad_norm": 2.8500599488625764, "kl": 0.0106353759765625, "learning_rate": 8.940682946331049e-07, "loss": 0.0189, "num_tokens": 65282462.0, "reward": 0.0, "reward_std": 0.7060377597808838, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08134736297805265, "rewards/wordcountpos_reward/raw_geo/std": 0.18010497008739787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 986.375, "completions/mean_terminated_length": 952.1333618164062, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.30106021204240846, "frac_reward_zero_std": 0.0, "grad_norm": 2.8455644929712167, "kl": 0.0103759765625, "learning_rate": 8.938656870946092e-07, "loss": -0.0906, "num_tokens": 65318396.0, "reward": 0.0, "reward_std": 0.7076146006584167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06985231981779662, "rewards/wordcountpos_reward/raw_geo/std": 0.08359844855866497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1025.3125, "completions/mean_terminated_length": 1025.3125, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.30126025205041007, "frac_reward_zero_std": 0.0, "grad_norm": 3.314180251340443, "kl": 0.013031005859375, "learning_rate": 8.936629118856429e-07, "loss": -0.024, "num_tokens": 65349169.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7110707759857178, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04682025849647551, "rewards/wordcountpos_reward/raw_geo/std": 0.05768349962498794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1041.875, "completions/mean_terminated_length": 1041.875, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.3014602920584117, "frac_reward_zero_std": 0.0, "grad_norm": 3.6436343324698814, "kl": 0.01800537109375, "learning_rate": 8.934599691050802e-07, "loss": -0.0181, "num_tokens": 65396655.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6815053224563599, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057494124272892, "rewards/wordcountpos_reward/raw_geo/std": 0.11644629306887737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19163043135739746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1237.8125, "completions/mean_terminated_length": 1220.3333740234375, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.3016603320664133, "frac_reward_zero_std": 0.0, "grad_norm": 2.8102307430057945, "kl": 0.0093841552734375, "learning_rate": 8.932568588518771e-07, "loss": -0.0143, "num_tokens": 65440036.0, "reward": 0.0, "reward_std": 1.0306439399719238, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12947051946796104, "rewards/wordcountpos_reward/raw_geo/std": 0.09813034334319035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1323.25, "completions/mean_terminated_length": 1282.4615478515625, "completions/min_length": 1040.0, "completions/min_terminated_length": 1040.0, "epoch": 0.3018603720744149, "frac_reward_zero_std": 0.0, "grad_norm": 2.5604035559231155, "kl": 0.01171875, "learning_rate": 8.930535812250708e-07, "loss": -0.0361, "num_tokens": 65488600.0, "reward": 0.0, "reward_std": 0.48000064492225647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03358847273707639, "rewards/wordcountpos_reward/raw_geo/std": 0.1420739152443631, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 971.625, "completions/mean_terminated_length": 971.625, "completions/min_length": 552.0, "completions/min_terminated_length": 552.0, "epoch": 0.30206041208241646, "frac_reward_zero_std": 0.0, "grad_norm": 3.506120609543339, "kl": 0.01434326171875, "learning_rate": 8.928501363237807e-07, "loss": 0.0035, "num_tokens": 65527330.0, "reward": 0.0, "reward_std": 0.8857091665267944, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15356173791888733, "rewards/wordcountpos_reward/raw_geo/std": 0.18465784116682463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1059.25, "completions/mean_terminated_length": 1029.86669921875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3022604520904181, "frac_reward_zero_std": 0.0, "grad_norm": 3.639112194115988, "kl": 0.0157318115234375, "learning_rate": 8.926465242472071e-07, "loss": -0.0264, "num_tokens": 65571854.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9478989839553833, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14157868869823553, "rewards/wordcountpos_reward/raw_geo/std": 0.1972394817386133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1223.9375, "completions/mean_terminated_length": 1160.2308349609375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3024604920984197, "frac_reward_zero_std": 0.0, "grad_norm": 2.514041060851769, "kl": 0.011627197265625, "learning_rate": 8.924427450946324e-07, "loss": -0.0286, "num_tokens": 65625477.0, "reward": 0.0, "reward_std": 0.6899297833442688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.25997325963201795, "rewards/wordcountpos_reward/raw_geo/std": 0.15405689843927894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 978.0625, "completions/mean_terminated_length": 978.0625, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.3026605321064213, "frac_reward_zero_std": 0.0, "grad_norm": 3.079938444146363, "kl": 0.014190673828125, "learning_rate": 8.922387989654202e-07, "loss": -0.0087, "num_tokens": 65661678.0, "reward": 0.0, "reward_std": 0.8159579634666443, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10778193063840877, "rewards/wordcountpos_reward/raw_geo/std": 0.17601365047533704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1267.1875, "completions/mean_terminated_length": 1233.9285888671875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3028605721144229, "frac_reward_zero_std": 0.0, "grad_norm": 2.974711941704128, "kl": 0.014801025390625, "learning_rate": 8.920346859590154e-07, "loss": 0.0138, "num_tokens": 65709177.0, "reward": 0.0, "reward_std": 0.8008859157562256, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0703179232245362, "rewards/wordcountpos_reward/raw_geo/std": 0.11750775017975358, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1033.8125, "completions/mean_terminated_length": 1033.8125, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.30306061212242447, "frac_reward_zero_std": 0.0, "grad_norm": 3.5582209337970148, "kl": 0.0156707763671875, "learning_rate": 8.918304061749449e-07, "loss": 0.0098, "num_tokens": 65747190.0, "reward": -3.725290298461914e-09, "reward_std": 1.027848482131958, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1761510692932802, "rewards/wordcountpos_reward/raw_geo/std": 0.13253862335445202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1243.8125, "completions/mean_terminated_length": 1226.7333984375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.3032606521304261, "frac_reward_zero_std": 0.0, "grad_norm": 3.1865204098648148, "kl": 0.012603759765625, "learning_rate": 8.916259597128159e-07, "loss": -0.0257, "num_tokens": 65793603.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0383530855178833, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01912401168482963, "rewards/wordcountpos_reward/raw_geo/std": 0.03879880029825146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1035.125, "completions/mean_terminated_length": 1035.125, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.3034606921384277, "frac_reward_zero_std": 0.0, "grad_norm": 3.395770656261149, "kl": 0.013214111328125, "learning_rate": 8.914213466723177e-07, "loss": 0.0192, "num_tokens": 65826213.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6731513738632202, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06547951117638277, "rewards/wordcountpos_reward/raw_geo/std": 0.11998319624795317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 923.6875, "completions/mean_terminated_length": 923.6875, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.3036607321464293, "frac_reward_zero_std": 0.0, "grad_norm": 3.685675213474096, "kl": 0.014190673828125, "learning_rate": 8.912165671532204e-07, "loss": -0.0029, "num_tokens": 65863672.0, "reward": 7.450580596923828e-09, "reward_std": 0.9551426768302917, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.011890006927995342, "rewards/wordcountpos_reward/raw_geo/std": 0.1183979178812905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238707, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1174.4375, "completions/mean_terminated_length": 921.2222290039062, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.3038607721544309, "frac_reward_zero_std": 0.0, "grad_norm": 3.333695501125601, "kl": 0.0115203857421875, "learning_rate": 8.910116212553758e-07, "loss": 0.0078, "num_tokens": 65907455.0, "reward": 3.725290298461914e-09, "reward_std": 1.0634804964065552, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.07307470327684458, "rewards/wordcountpos_reward/raw_geo/std": 0.0725740455480212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1295.6875, "completions/mean_terminated_length": 1248.5384521484375, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.30406081216243247, "frac_reward_zero_std": 0.0, "grad_norm": 3.408929085143891, "kl": 0.0147552490234375, "learning_rate": 8.90806509078716e-07, "loss": -0.0097, "num_tokens": 65954618.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0090022087097168, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008059746663650162, "rewards/wordcountpos_reward/raw_geo/std": 0.023627014241018925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1240.0, "completions/mean_terminated_length": 1153.3333740234375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.3042608521704341, "frac_reward_zero_std": 0.0, "grad_norm": 3.3022804045739576, "kl": 0.0134124755859375, "learning_rate": 8.906012307232548e-07, "loss": -0.0373, "num_tokens": 66004442.0, "reward": 0.0, "reward_std": 0.38872620463371277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08013572981636055, "rewards/wordcountpos_reward/raw_geo/std": 0.25926577963374786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 1009.8125, "completions/mean_terminated_length": 1009.8125, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.3044608921784357, "frac_reward_zero_std": 0.0, "grad_norm": 3.688115607131627, "kl": 0.0160980224609375, "learning_rate": 8.903957862890869e-07, "loss": 0.0329, "num_tokens": 66043671.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0300618410110474, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07682258703412403, "rewards/wordcountpos_reward/raw_geo/std": 0.0510424403426051, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945508, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1178.6875, "completions/mean_terminated_length": 1104.5384521484375, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.3046609321864373, "frac_reward_zero_std": 0.0, "grad_norm": 2.8687043930106912, "kl": 0.00933837890625, "learning_rate": 8.901901758763879e-07, "loss": 0.0163, "num_tokens": 66098746.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6993717551231384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06032629119198504, "rewards/wordcountpos_reward/raw_geo/std": 0.2586133961228637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928165, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1083.1875, "completions/mean_terminated_length": 1083.1875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.30486097219443886, "frac_reward_zero_std": 0.0, "grad_norm": 3.960019192816607, "kl": 0.015716552734375, "learning_rate": 8.899843995854142e-07, "loss": 0.0429, "num_tokens": 66144581.0, "reward": 0.0, "reward_std": 0.7789512872695923, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06869457204172164, "rewards/wordcountpos_reward/raw_geo/std": 0.15523565061403158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 968.875, "completions/mean_terminated_length": 968.875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.3050610122024405, "frac_reward_zero_std": 0.0, "grad_norm": 3.0154508085098914, "kl": 0.01128387451171875, "learning_rate": 8.897784575165037e-07, "loss": -0.0026, "num_tokens": 66187843.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3457336127758026, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20323528767081042, "rewards/wordcountpos_reward/raw_geo/std": 0.1391034346409706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17126976771553507, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1202.75, "completions/mean_terminated_length": 1067.6363525390625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3052610522104421, "frac_reward_zero_std": 0.0, "grad_norm": 3.5934913552454413, "kl": 0.015594482421875, "learning_rate": 8.895723497700743e-07, "loss": 0.0109, "num_tokens": 66229855.0, "reward": -5.960464477539063e-08, "reward_std": 0.8925884366035461, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11756340675372441, "rewards/wordcountpos_reward/raw_geo/std": 0.1757370496620371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 997.6875, "completions/mean_terminated_length": 997.6875, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.3054610922184437, "frac_reward_zero_std": 0.0, "grad_norm": 3.1240051265169857, "kl": 0.0145721435546875, "learning_rate": 8.893660764466251e-07, "loss": 0.0152, "num_tokens": 66264674.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9766949415206909, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07169236743143663, "rewards/wordcountpos_reward/raw_geo/std": 0.07782348690095019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1138.0, "completions/mean_terminated_length": 1113.86669921875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3056611322264453, "frac_reward_zero_std": 0.0, "grad_norm": 3.143491376853367, "kl": 0.0150299072265625, "learning_rate": 8.89159637646736e-07, "loss": -0.0021, "num_tokens": 66312594.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8591177463531494, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014970841755525247, "rewards/wordcountpos_reward/raw_geo/std": 0.1220851380116241, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1277.0, "completions/mean_terminated_length": 1143.2000732421875, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.30586117223444687, "frac_reward_zero_std": 0.0, "grad_norm": 3.007880027015834, "kl": 0.0118560791015625, "learning_rate": 8.889530334710676e-07, "loss": 0.0448, "num_tokens": 66359434.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0181944370269775, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004598640879066152, "rewards/wordcountpos_reward/raw_geo/std": 0.10980499845200752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1142.5625, "completions/mean_terminated_length": 1142.5625, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.3060612122424485, "frac_reward_zero_std": 0.0, "grad_norm": 2.7142540274490043, "kl": 0.00984954833984375, "learning_rate": 8.887462640203609e-07, "loss": 0.016, "num_tokens": 66409475.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8652361035346985, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09410256217471537, "rewards/wordcountpos_reward/raw_geo/std": 0.21772364414204134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1027.25, "completions/mean_terminated_length": 1027.25, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.3062612522504501, "frac_reward_zero_std": 0.0, "grad_norm": 3.0904285943136074, "kl": 0.009723663330078125, "learning_rate": 8.885393293954377e-07, "loss": -0.0253, "num_tokens": 66440711.0, "reward": 2.9802322387695312e-08, "reward_std": 0.38162240386009216, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06683550399186128, "rewards/wordcountpos_reward/raw_geo/std": 0.13292134844170284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1228.75, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.3064612922584517, "frac_reward_zero_std": 0.0, "grad_norm": 2.354316043711627, "kl": 0.0064544677734375, "learning_rate": 8.883322296972001e-07, "loss": -0.0337, "num_tokens": 66492203.0, "reward": -7.450580596923828e-09, "reward_std": 0.9064786434173584, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.002148987847535119, "rewards/wordcountpos_reward/raw_geo/std": 0.06872546579897962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1219.9375, "completions/mean_terminated_length": 1201.2667236328125, "completions/min_length": 256.0, "completions/min_terminated_length": 256.0, "epoch": 0.3066613322664533, "frac_reward_zero_std": 0.0, "grad_norm": 2.7026574859399686, "kl": 0.0135498046875, "learning_rate": 8.881249650266311e-07, "loss": -0.1129, "num_tokens": 66547018.0, "reward": 0.0, "reward_std": 0.37324953079223633, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08052890528929858, "rewards/wordcountpos_reward/raw_geo/std": 0.08937977631022495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1054.625, "completions/mean_terminated_length": 1054.625, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.30686137227445487, "frac_reward_zero_std": 0.0, "grad_norm": 3.2793679076034477, "kl": 0.0122222900390625, "learning_rate": 8.879175354847937e-07, "loss": -0.0789, "num_tokens": 66600492.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8591996431350708, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01607357559738295, "rewards/wordcountpos_reward/raw_geo/std": 0.03290865455955885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1036.5625, "completions/mean_terminated_length": 1036.5625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3070614122824565, "frac_reward_zero_std": 0.0, "grad_norm": 2.989883769889972, "kl": 0.0112762451171875, "learning_rate": 8.877099411728314e-07, "loss": -0.0331, "num_tokens": 66642533.0, "reward": 0.0, "reward_std": 0.6208987236022949, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1308921608609547, "rewards/wordcountpos_reward/raw_geo/std": 0.15574081864194272, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1150.625, "completions/mean_terminated_length": 1150.625, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.3072614522904581, "frac_reward_zero_std": 0.0, "grad_norm": 2.953824449933008, "kl": 0.0125732421875, "learning_rate": 8.875021821919684e-07, "loss": -0.0374, "num_tokens": 66679439.0, "reward": 1.4901161193847656e-08, "reward_std": 1.026672124862671, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026093179943833793, "rewards/wordcountpos_reward/raw_geo/std": 0.07362271210631866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1171.3125, "completions/mean_terminated_length": 1149.4000244140625, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.3074614922984597, "frac_reward_zero_std": 0.0, "grad_norm": 2.9125444942937806, "kl": 0.0110015869140625, "learning_rate": 8.872942586435088e-07, "loss": -0.0154, "num_tokens": 66725084.0, "reward": 0.0, "reward_std": 0.8341853022575378, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026693848646373117, "rewards/wordcountpos_reward/raw_geo/std": 0.16526720457345884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1168.0625, "completions/mean_terminated_length": 1145.933349609375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3076615323064613, "frac_reward_zero_std": 0.0, "grad_norm": 2.8862942343229387, "kl": 0.0129852294921875, "learning_rate": 8.87086170628837e-07, "loss": -0.0406, "num_tokens": 66767885.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7962286472320557, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08119704825835708, "rewards/wordcountpos_reward/raw_geo/std": 0.06548641618818236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1183.375, "completions/mean_terminated_length": 1162.2667236328125, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.3078615723144629, "frac_reward_zero_std": 0.0, "grad_norm": 3.1232281173726033, "kl": 0.01287841796875, "learning_rate": 8.868779182494178e-07, "loss": 0.003, "num_tokens": 66817035.0, "reward": 0.0, "reward_std": 0.4290623068809509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058969565918843836, "rewards/wordcountpos_reward/raw_geo/std": 0.2860248805638058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1107.25, "completions/mean_terminated_length": 1081.0667724609375, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.3080616123224645, "frac_reward_zero_std": 0.0, "grad_norm": 2.7581782686460237, "kl": 0.01030731201171875, "learning_rate": 8.86669501606796e-07, "loss": 0.0096, "num_tokens": 66865775.0, "reward": 0.0, "reward_std": 0.7750606536865234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11571340762656603, "rewards/wordcountpos_reward/raw_geo/std": 0.09762077797368346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1182.1875, "completions/mean_terminated_length": 1182.1875, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.3082616523304661, "frac_reward_zero_std": 0.0, "grad_norm": 3.152020457208807, "kl": 0.0146942138671875, "learning_rate": 8.864609208025962e-07, "loss": -0.0267, "num_tokens": 66916594.0, "reward": -7.450580596923828e-09, "reward_std": 1.0525057315826416, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0763243572763026, "rewards/wordcountpos_reward/raw_geo/std": 0.06960535562230581, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1028.5625, "completions/mean_terminated_length": 997.1333618164062, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3084616923384677, "frac_reward_zero_std": 0.0, "grad_norm": 3.0722721447057095, "kl": 0.0120849609375, "learning_rate": 8.862521759385238e-07, "loss": 0.006, "num_tokens": 66959019.0, "reward": -5.960464477539063e-08, "reward_std": 0.6212877035140991, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009477141838253001, "rewards/wordcountpos_reward/raw_geo/std": 0.02136304086076395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1059.75, "completions/mean_terminated_length": 1059.75, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.3086617323464693, "frac_reward_zero_std": 0.0, "grad_norm": 3.19483790587238, "kl": 0.0100860595703125, "learning_rate": 8.860432671163635e-07, "loss": -0.0138, "num_tokens": 66995599.0, "reward": 0.0, "reward_std": 1.036252498626709, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03399354742090029, "rewards/wordcountpos_reward/raw_geo/std": 0.03803052726939504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1121.0, "completions/mean_terminated_length": 1121.0, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.3088617723544709, "frac_reward_zero_std": 0.0, "grad_norm": 3.195248852339637, "kl": 0.0141448974609375, "learning_rate": 8.858341944379801e-07, "loss": 0.029, "num_tokens": 67046823.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9540793299674988, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013055438548569605, "rewards/wordcountpos_reward/raw_geo/std": 0.11409094403894339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752094, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1185.75, "completions/mean_terminated_length": 1113.2308349609375, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.3090618123624725, "frac_reward_zero_std": 0.0, "grad_norm": 2.9243491131790127, "kl": 0.01214599609375, "learning_rate": 8.856249580053186e-07, "loss": -0.0005, "num_tokens": 67090947.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0064913034439087, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3221075759757471, "rewards/wordcountpos_reward/raw_geo/std": 0.08181261036086862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1059.0, "completions/mean_terminated_length": 1059.0, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.3092618523704741, "frac_reward_zero_std": 0.0, "grad_norm": 3.2721800151200835, "kl": 0.011260986328125, "learning_rate": 8.854155579204036e-07, "loss": -0.0097, "num_tokens": 67125923.0, "reward": -1.862645149230957e-09, "reward_std": 1.0666723251342773, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04273423305010233, "rewards/wordcountpos_reward/raw_geo/std": 0.03267786801817852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1107.0, "completions/mean_terminated_length": 1107.0, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.3094618923784757, "frac_reward_zero_std": 0.0, "grad_norm": 3.1833100717055838, "kl": 0.01153564453125, "learning_rate": 8.852059942853393e-07, "loss": -0.0333, "num_tokens": 67177739.0, "reward": 0.0, "reward_std": 0.753730833530426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.021449551531011014, "rewards/wordcountpos_reward/raw_geo/std": 0.09060810907392117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1224.6875, "completions/mean_terminated_length": 1185.357177734375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.30966193238647727, "frac_reward_zero_std": 0.0, "grad_norm": 2.9511033761526155, "kl": 0.02164459228515625, "learning_rate": 8.8499626720231e-07, "loss": 0.0404, "num_tokens": 67230806.0, "reward": 0.0, "reward_std": 0.5632149577140808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.039873350935431394, "rewards/wordcountpos_reward/raw_geo/std": 0.1456775175156621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.2511823890972307, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 999.875, "completions/mean_terminated_length": 999.875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.3098619723944789, "frac_reward_zero_std": 0.0, "grad_norm": 3.058683076178571, "kl": 0.01165771484375, "learning_rate": 8.847863767735798e-07, "loss": 0.0017, "num_tokens": 67275404.0, "reward": 0.0, "reward_std": 0.6979461908340454, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08095004888482196, "rewards/wordcountpos_reward/raw_geo/std": 0.14020305632208324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1206.9375, "completions/mean_terminated_length": 1187.4000244140625, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.3100620124024805, "frac_reward_zero_std": 0.0, "grad_norm": 3.4435971821270046, "kl": 0.0122222900390625, "learning_rate": 8.84576323101492e-07, "loss": -0.0129, "num_tokens": 67321555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.49310189485549927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10908625314789303, "rewards/wordcountpos_reward/raw_geo/std": 0.22350127139590187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1233.875, "completions/mean_terminated_length": 1074.2000732421875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3102620524104821, "frac_reward_zero_std": 0.0, "grad_norm": 2.2406333642590166, "kl": 0.0165252685546875, "learning_rate": 8.843661062884697e-07, "loss": -0.0069, "num_tokens": 67368185.0, "reward": 0.0, "reward_std": 0.963836669921875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04149796524792276, "rewards/wordcountpos_reward/raw_geo/std": 0.07743742581845506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1520233900132184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1191.125, "completions/mean_terminated_length": 1191.125, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.3104620924184837, "frac_reward_zero_std": 0.0, "grad_norm": 2.8252713112414907, "kl": 0.01031494140625, "learning_rate": 8.841557264370157e-07, "loss": -0.0293, "num_tokens": 67407827.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0579392910003662, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08114629525293365, "rewards/wordcountpos_reward/raw_geo/std": 0.1420111359841418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1327.375, "completions/mean_terminated_length": 1193.111083984375, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.31066213242648527, "frac_reward_zero_std": 0.0, "grad_norm": 3.286008875953418, "kl": 0.0150604248046875, "learning_rate": 8.839451836497123e-07, "loss": -0.0001, "num_tokens": 67451497.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8361608982086182, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19903882233755354, "rewards/wordcountpos_reward/raw_geo/std": 0.21872693306155905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1219.75, "completions/mean_terminated_length": 1219.75, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.3108621724344869, "frac_reward_zero_std": 0.0, "grad_norm": 2.447575028712033, "kl": 0.0086212158203125, "learning_rate": 8.837344780292207e-07, "loss": -0.0388, "num_tokens": 67489741.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8923624753952026, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04062641953505993, "rewards/wordcountpos_reward/raw_geo/std": 0.16552705330602832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1284.8125, "completions/mean_terminated_length": 1235.1539306640625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.3110622124424885, "frac_reward_zero_std": 0.0, "grad_norm": 3.127903306648886, "kl": 0.0142974853515625, "learning_rate": 8.835236096782823e-07, "loss": -0.0748, "num_tokens": 67535466.0, "reward": 1.4901161193847656e-08, "reward_std": 1.044126033782959, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00795851783968677, "rewards/wordcountpos_reward/raw_geo/std": 0.16746145196721515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1099.6875, "completions/mean_terminated_length": 1099.6875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3112622524504901, "frac_reward_zero_std": 0.0, "grad_norm": 3.089450921793083, "kl": 0.01044464111328125, "learning_rate": 8.833125786997172e-07, "loss": -0.0133, "num_tokens": 67584789.0, "reward": 0.0, "reward_std": 0.8578050136566162, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06936662673855049, "rewards/wordcountpos_reward/raw_geo/std": 0.14250272325423746, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1203.9375, "completions/mean_terminated_length": 1135.615478515625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.3114622924584917, "frac_reward_zero_std": 0.0, "grad_norm": 2.8987471743600977, "kl": 0.01373291015625, "learning_rate": 8.831013851964253e-07, "loss": -0.0741, "num_tokens": 67626764.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9918359518051147, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12341565297831397, "rewards/wordcountpos_reward/raw_geo/std": 0.08525236833099371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 979.375, "completions/mean_terminated_length": 979.375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.3116623324664933, "frac_reward_zero_std": 0.0, "grad_norm": 3.4869007134265724, "kl": 0.0156707763671875, "learning_rate": 8.828900292713852e-07, "loss": -0.0213, "num_tokens": 67673634.0, "reward": 0.0, "reward_std": 0.9104900360107422, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08684062188233006, "rewards/wordcountpos_reward/raw_geo/std": 0.046562681978747904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1146.375, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.3118623724744949, "frac_reward_zero_std": 0.0, "grad_norm": 2.9611884467169225, "kl": 0.0125732421875, "learning_rate": 8.826785110276554e-07, "loss": -0.0058, "num_tokens": 67713352.0, "reward": 2.9802322387695312e-08, "reward_std": 1.012331485748291, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0028908920609303883, "rewards/wordcountpos_reward/raw_geo/std": 0.10621303819614113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1272.25, "completions/mean_terminated_length": 1257.0667724609375, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.3120624124824965, "frac_reward_zero_std": 0.0, "grad_norm": 3.132556069123932, "kl": 0.0156097412109375, "learning_rate": 8.824668305683727e-07, "loss": 0.0027, "num_tokens": 67763820.0, "reward": -5.960464477539063e-08, "reward_std": 0.6365565657615662, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.276049301693915, "rewards/wordcountpos_reward/raw_geo/std": 0.2256541879867406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1107.3125, "completions/mean_terminated_length": 1107.3125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.3122624524904981, "frac_reward_zero_std": 0.0, "grad_norm": 3.6476165619780776, "kl": 0.0171356201171875, "learning_rate": 8.822549879967542e-07, "loss": 0.0177, "num_tokens": 67810281.0, "reward": 0.0, "reward_std": 0.7430156469345093, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07537980442832835, "rewards/wordcountpos_reward/raw_geo/std": 0.1517052103644729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 1271.8125, "completions/mean_terminated_length": 1043.625, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.3124624924984997, "frac_reward_zero_std": 0.0, "grad_norm": 3.2532799207854755, "kl": 0.0138092041015625, "learning_rate": 8.820429834160944e-07, "loss": -0.0396, "num_tokens": 67864254.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6171025633811951, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013195082058544057, "rewards/wordcountpos_reward/raw_geo/std": 0.13006413643457318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1045.0, "completions/max_terminated_length": 1045.0, "completions/mean_length": 885.8125, "completions/mean_terminated_length": 885.8125, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.3126625325065013, "frac_reward_zero_std": 0.0, "grad_norm": 3.378178294980577, "kl": 0.01348876953125, "learning_rate": 8.818308169297683e-07, "loss": 0.0001, "num_tokens": 67904947.0, "reward": 0.0, "reward_std": 0.5333974361419678, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007400721060385203, "rewards/wordcountpos_reward/raw_geo/std": 0.08350638556766352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1112.4375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.3128625725145029, "frac_reward_zero_std": 0.0, "grad_norm": 3.1676097085993753, "kl": 0.0122528076171875, "learning_rate": 8.816184886412291e-07, "loss": -0.0213, "num_tokens": 67949738.0, "reward": -2.2351741790771484e-08, "reward_std": 1.002312421798706, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01510218146175955, "rewards/wordcountpos_reward/raw_geo/std": 0.07360919508933594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1080.4375, "completions/mean_terminated_length": 1080.4375, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.3130626125225045, "frac_reward_zero_std": 0.0, "grad_norm": 3.5864291650125497, "kl": 0.0146026611328125, "learning_rate": 8.814059986540087e-07, "loss": 0.0437, "num_tokens": 67996721.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7108614444732666, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0257855742980834, "rewards/wordcountpos_reward/raw_geo/std": 0.04412411534207239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 947.0625, "completions/mean_terminated_length": 947.0625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.3132626525305061, "frac_reward_zero_std": 0.0, "grad_norm": 3.8995940613253968, "kl": 0.0153656005859375, "learning_rate": 8.811933470717187e-07, "loss": 0.0088, "num_tokens": 68047810.0, "reward": 2.60770320892334e-08, "reward_std": 1.0452021360397339, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10611139640300007, "rewards/wordcountpos_reward/raw_geo/std": 0.07339664949013859, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.26105200276601626, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1122.1429443359375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.3134626925385077, "frac_reward_zero_std": 0.0, "grad_norm": 3.373046760086607, "kl": 0.01495361328125, "learning_rate": 8.809805339980489e-07, "loss": -0.0089, "num_tokens": 68098488.0, "reward": -2.9802322387695312e-08, "reward_std": 0.662135124206543, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0035310446599344653, "rewards/wordcountpos_reward/raw_geo/std": 0.10061880535202689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1205.9375, "completions/mean_terminated_length": 1186.3333740234375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.3136627325465093, "frac_reward_zero_std": 0.0, "grad_norm": 2.8816348634711813, "kl": 0.009613037109375, "learning_rate": 8.807675595367674e-07, "loss": 0.0374, "num_tokens": 68136703.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8111258149147034, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00523816750743297, "rewards/wordcountpos_reward/raw_geo/std": 0.1510539922485454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1235.125, "completions/mean_terminated_length": 1197.2857666015625, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.3138627725545109, "frac_reward_zero_std": 0.0, "grad_norm": 2.5103383488024105, "kl": 0.0102691650390625, "learning_rate": 8.805544237917222e-07, "loss": -0.0355, "num_tokens": 68183881.0, "reward": 0.0, "reward_std": 0.792202353477478, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029927673717363802, "rewards/wordcountpos_reward/raw_geo/std": 0.12631156238380636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1223.75, "completions/mean_terminated_length": 1223.75, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.3140628125625125, "frac_reward_zero_std": 0.0, "grad_norm": 2.8913291404074664, "kl": 0.010833740234375, "learning_rate": 8.803411268668387e-07, "loss": 0.0318, "num_tokens": 68233653.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8148121237754822, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07154118218644451, "rewards/wordcountpos_reward/raw_geo/std": 0.09725435575952426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1125.0, "completions/max_terminated_length": 1125.0, "completions/mean_length": 942.9375, "completions/mean_terminated_length": 942.9375, "completions/min_length": 561.0, "completions/min_terminated_length": 561.0, "epoch": 0.3142628525705141, "frac_reward_zero_std": 0.0, "grad_norm": 2.4342270866702056, "kl": 0.005859375, "learning_rate": 8.801276688661217e-07, "loss": -0.0157, "num_tokens": 68272108.0, "reward": 0.0, "reward_std": 0.8952018618583679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21659465905760503, "rewards/wordcountpos_reward/raw_geo/std": 0.027743311715286458, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1131.0, "completions/mean_terminated_length": 1131.0, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.31446289257851573, "frac_reward_zero_std": 0.0, "grad_norm": 4.002889922843912, "kl": 0.017974853515625, "learning_rate": 8.799140498936545e-07, "loss": -0.0396, "num_tokens": 68315004.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9004261493682861, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017226014681493804, "rewards/wordcountpos_reward/raw_geo/std": 0.10736089592095367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1152.0, "completions/mean_terminated_length": 1152.0, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3146629325865173, "frac_reward_zero_std": 0.0, "grad_norm": 3.163317177855879, "kl": 0.01373291015625, "learning_rate": 8.797002700535984e-07, "loss": -0.0013, "num_tokens": 68359812.0, "reward": 5.960464477539063e-08, "reward_std": 0.4477081596851349, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15761104512120155, "rewards/wordcountpos_reward/raw_geo/std": 0.316092888680308, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1072.8125, "completions/mean_terminated_length": 1072.8125, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.3148629725945189, "frac_reward_zero_std": 0.0, "grad_norm": 3.3796532564270585, "kl": 0.0107879638671875, "learning_rate": 8.794863294501934e-07, "loss": 0.0348, "num_tokens": 68406281.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6505720615386963, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02912702659987597, "rewards/wordcountpos_reward/raw_geo/std": 0.07133172007978669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.2237723711142063, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 966.0625, "completions/mean_terminated_length": 966.0625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.3150630126025205, "frac_reward_zero_std": 0.0, "grad_norm": 2.7691465082822346, "kl": 0.0117340087890625, "learning_rate": 8.792722281877581e-07, "loss": 0.0392, "num_tokens": 68450602.0, "reward": 0.0, "reward_std": 0.8999190330505371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05927946503531806, "rewards/wordcountpos_reward/raw_geo/std": 0.05862487755436452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 926.375, "completions/mean_terminated_length": 926.375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.3152630526105221, "frac_reward_zero_std": 0.0, "grad_norm": 3.6246840133609366, "kl": 0.011627197265625, "learning_rate": 8.790579663706891e-07, "loss": -0.0282, "num_tokens": 68483480.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6915251016616821, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.001774806966841395, "rewards/wordcountpos_reward/raw_geo/std": 0.04894336106383481, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 1060.0, "completions/mean_terminated_length": 1060.0, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.3154630926185237, "frac_reward_zero_std": 0.0, "grad_norm": 3.5684484214763876, "kl": 0.0152435302734375, "learning_rate": 8.788435441034614e-07, "loss": -0.0059, "num_tokens": 68526464.0, "reward": 3.725290298461914e-09, "reward_std": 0.9793384075164795, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.18088690006088792, "rewards/wordcountpos_reward/raw_geo/std": 0.1275818870379303, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1123.4375, "completions/mean_terminated_length": 1098.3333740234375, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.3156631326265253, "frac_reward_zero_std": 0.0, "grad_norm": 3.371664310057631, "kl": 0.0129241943359375, "learning_rate": 8.786289614906283e-07, "loss": -0.0548, "num_tokens": 68565519.0, "reward": 0.0, "reward_std": 0.9364562034606934, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07425712139114941, "rewards/wordcountpos_reward/raw_geo/std": 0.11593128228835065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1083.1875, "completions/mean_terminated_length": 1083.1875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.3158631726345269, "frac_reward_zero_std": 0.0, "grad_norm": 2.3489322247254854, "kl": 0.0107269287109375, "learning_rate": 8.784142186368214e-07, "loss": -0.0461, "num_tokens": 68601922.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9669705629348755, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1522350613941733, "rewards/wordcountpos_reward/raw_geo/std": 0.45106490963562, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1062.8125, "completions/mean_terminated_length": 1062.8125, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.3160632126425285, "frac_reward_zero_std": 0.0, "grad_norm": 2.9011049739712456, "kl": 0.00803375244140625, "learning_rate": 8.781993156467503e-07, "loss": -0.0159, "num_tokens": 68647423.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9258589148521423, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05050123528105946, "rewards/wordcountpos_reward/raw_geo/std": 0.16432188215439258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1261.0625, "completions/mean_terminated_length": 1261.0625, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.3162632526505301, "frac_reward_zero_std": 0.0, "grad_norm": 3.2038415236144484, "kl": 0.012298583984375, "learning_rate": 8.779842526252024e-07, "loss": -0.0185, "num_tokens": 68689400.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7914847135543823, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06036636378091192, "rewards/wordcountpos_reward/raw_geo/std": 0.03642425582604647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1147.6875, "completions/mean_terminated_length": 1124.2000732421875, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.3164632926585317, "frac_reward_zero_std": 0.0, "grad_norm": 2.7746051305510844, "kl": 0.01210784912109375, "learning_rate": 8.777690296770437e-07, "loss": -0.0131, "num_tokens": 68731107.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8282698392868042, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1471876682459795, "rewards/wordcountpos_reward/raw_geo/std": 0.17817071582053842, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1186.4375, "completions/mean_terminated_length": 1043.9091796875, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.3166633326665333, "frac_reward_zero_std": 0.0, "grad_norm": 3.2264803830072477, "kl": 0.014312744140625, "learning_rate": 8.775536469072178e-07, "loss": 0.0346, "num_tokens": 68783738.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9534915089607239, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0039703081630581005, "rewards/wordcountpos_reward/raw_geo/std": 0.12009061968215361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 1119.25, "completions/mean_terminated_length": 1093.86669921875, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.3168633726745349, "frac_reward_zero_std": 0.0, "grad_norm": 2.8959232476628824, "kl": 0.0111083984375, "learning_rate": 8.77338104420746e-07, "loss": -0.0051, "num_tokens": 68827926.0, "reward": 7.450580596923828e-09, "reward_std": 0.995486855506897, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.045305738994841416, "rewards/wordcountpos_reward/raw_geo/std": 0.07667984325277001, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337808, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1011.25, "completions/mean_terminated_length": 1011.25, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.3170634126825365, "frac_reward_zero_std": 0.0, "grad_norm": 3.006943965417861, "kl": 0.012237548828125, "learning_rate": 8.771224023227284e-07, "loss": 0.0062, "num_tokens": 68868418.0, "reward": 0.0, "reward_std": 0.933379590511322, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005277191377503794, "rewards/wordcountpos_reward/raw_geo/std": 0.08754166630403425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1043.0, "completions/max_terminated_length": 1043.0, "completions/mean_length": 896.9375, "completions/mean_terminated_length": 896.9375, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.31726345269053813, "frac_reward_zero_std": 0.0, "grad_norm": 3.3322654254095223, "kl": 0.0125732421875, "learning_rate": 8.769065407183418e-07, "loss": -0.0165, "num_tokens": 68904241.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0416842699050903, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01716716227931385, "rewards/wordcountpos_reward/raw_geo/std": 0.09950324377868686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1400.125, "completions/mean_terminated_length": 1271.71435546875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.3174634926985397, "frac_reward_zero_std": 0.0, "grad_norm": 3.045622138192471, "kl": 0.0139923095703125, "learning_rate": 8.766905197128416e-07, "loss": -0.0079, "num_tokens": 68961563.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0298126935958862, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011343066012834035, "rewards/wordcountpos_reward/raw_geo/std": 0.2300492371618011, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1094.0, "completions/mean_terminated_length": 1066.933349609375, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.3176635327065413, "frac_reward_zero_std": 0.0, "grad_norm": 3.236366460327572, "kl": 0.0157928466796875, "learning_rate": 8.764743394115604e-07, "loss": 0.0143, "num_tokens": 69013195.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8382514715194702, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04073742505844089, "rewards/wordcountpos_reward/raw_geo/std": 0.032596749472514536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 831.375, "completions/mean_terminated_length": 831.375, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.3178635727145429, "frac_reward_zero_std": 0.0, "grad_norm": 2.748882608834058, "kl": 0.00759124755859375, "learning_rate": 8.762579999199089e-07, "loss": 0.0093, "num_tokens": 69044889.0, "reward": 0.0, "reward_std": 1.0451180934906006, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1258962182967499, "rewards/wordcountpos_reward/raw_geo/std": 0.051868925157152156, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1166.75, "completions/mean_terminated_length": 1166.75, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.3180636127225445, "frac_reward_zero_std": 0.0, "grad_norm": 3.333627588331488, "kl": 0.0146942138671875, "learning_rate": 8.76041501343375e-07, "loss": -0.0155, "num_tokens": 69084357.0, "reward": -7.450580596923828e-09, "reward_std": 1.0352282524108887, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07563418210521654, "rewards/wordcountpos_reward/raw_geo/std": 0.0508009172809852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 967.25, "completions/mean_terminated_length": 967.25, "completions/min_length": 471.0, "completions/min_terminated_length": 471.0, "epoch": 0.31826365273054613, "frac_reward_zero_std": 0.0, "grad_norm": 3.6457553767546815, "kl": 0.0136566162109375, "learning_rate": 8.758248437875246e-07, "loss": 0.0606, "num_tokens": 69129553.0, "reward": 2.9802322387695312e-08, "reward_std": 0.2511444687843323, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04688763187245333, "rewards/wordcountpos_reward/raw_geo/std": 0.05714744585474582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.20923139768633622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1352.75, "completions/mean_terminated_length": 1331.71435546875, "completions/min_length": 1214.0, "completions/min_terminated_length": 1214.0, "epoch": 0.3184636927385477, "frac_reward_zero_std": 0.0, "grad_norm": 2.624934650990559, "kl": 0.010589599609375, "learning_rate": 8.756080273580009e-07, "loss": -0.0062, "num_tokens": 69175045.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7568929195404053, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11011449934728144, "rewards/wordcountpos_reward/raw_geo/std": 0.4128410334402734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1091.9375, "completions/mean_terminated_length": 1033.6429443359375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.3186637327465493, "frac_reward_zero_std": 0.0, "grad_norm": 3.686019771758005, "kl": 0.0162200927734375, "learning_rate": 8.753910521605245e-07, "loss": -0.0621, "num_tokens": 69224508.0, "reward": 0.0, "reward_std": 1.0114943981170654, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15080110303794583, "rewards/wordcountpos_reward/raw_geo/std": 0.11792134965310988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1305.1875, "completions/mean_terminated_length": 1292.2000732421875, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.3188637727545509, "frac_reward_zero_std": 0.0, "grad_norm": 2.62190816484297, "kl": 0.0098114013671875, "learning_rate": 8.751739183008935e-07, "loss": -0.0184, "num_tokens": 69273975.0, "reward": -2.9802322387695312e-08, "reward_std": 0.797948956489563, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14550086044449423, "rewards/wordcountpos_reward/raw_geo/std": 0.1331604326108755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1156.375, "completions/mean_terminated_length": 1156.375, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.3190638127625525, "frac_reward_zero_std": 0.0, "grad_norm": 3.4193560472601034, "kl": 0.0166778564453125, "learning_rate": 8.749566258849833e-07, "loss": -0.0388, "num_tokens": 69318165.0, "reward": 0.0, "reward_std": 0.775354266166687, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18240270207601864, "rewards/wordcountpos_reward/raw_geo/std": 0.17912395436559866, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1227.9375, "completions/mean_terminated_length": 1227.9375, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.31926385277055414, "frac_reward_zero_std": 0.0, "grad_norm": 2.955087754613613, "kl": 0.0126953125, "learning_rate": 8.747391750187468e-07, "loss": -0.0106, "num_tokens": 69369980.0, "reward": 0.0, "reward_std": 0.9563160538673401, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10828293055163338, "rewards/wordcountpos_reward/raw_geo/std": 0.14969418019126313, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1197.375, "completions/mean_terminated_length": 1177.2000732421875, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.3194638927785557, "frac_reward_zero_std": 0.0, "grad_norm": 3.4810574774930365, "kl": 0.0149383544921875, "learning_rate": 8.745215658082138e-07, "loss": 0.0408, "num_tokens": 69419618.0, "reward": 0.0, "reward_std": 0.6060514450073242, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01379265701292679, "rewards/wordcountpos_reward/raw_geo/std": 0.20018001375998773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1328.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1069.625, "completions/mean_terminated_length": 1069.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.3196639327865573, "frac_reward_zero_std": 0.0, "grad_norm": 1.805656598014149, "kl": 0.0037021636962890625, "learning_rate": 8.743037983594917e-07, "loss": -0.0172, "num_tokens": 69461620.0, "reward": 0.0, "reward_std": 0.9250842928886414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014396808622773056, "rewards/wordcountpos_reward/raw_geo/std": 0.05921011271343425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1094.1875, "completions/mean_terminated_length": 1094.1875, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.3198639727945589, "frac_reward_zero_std": 0.0, "grad_norm": 3.4907672235794642, "kl": 0.0142059326171875, "learning_rate": 8.740858727787651e-07, "loss": 0.0329, "num_tokens": 69514847.0, "reward": 0.0, "reward_std": 0.7607910633087158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.018264410001570718, "rewards/wordcountpos_reward/raw_geo/std": 0.04662311628043402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1213.6875, "completions/mean_terminated_length": 1194.60009765625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.3200640128025605, "frac_reward_zero_std": 0.0, "grad_norm": 2.8587935450601907, "kl": 0.012054443359375, "learning_rate": 8.738677891722951e-07, "loss": -0.042, "num_tokens": 69555946.0, "reward": 0.0, "reward_std": 0.8894298076629639, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011683751771228656, "rewards/wordcountpos_reward/raw_geo/std": 0.15297810295497705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1256.625, "completions/mean_terminated_length": 1175.5, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.32026405281056214, "frac_reward_zero_std": 0.0, "grad_norm": 3.253304689368561, "kl": 0.01470947265625, "learning_rate": 8.736495476464205e-07, "loss": -0.016, "num_tokens": 69609140.0, "reward": 0.0, "reward_std": 1.0084575414657593, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0862451665164458, "rewards/wordcountpos_reward/raw_geo/std": 0.07137471201934144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1318.25, "completions/mean_terminated_length": 1209.2000732421875, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.3204640928185637, "frac_reward_zero_std": 0.0, "grad_norm": 3.0458765620895685, "kl": 0.0158538818359375, "learning_rate": 8.734311483075568e-07, "loss": -0.0244, "num_tokens": 69657504.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7522756457328796, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010867540296603483, "rewards/wordcountpos_reward/raw_geo/std": 0.32075806524759004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1190.875, "completions/mean_terminated_length": 1170.2667236328125, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.3206641328265653, "frac_reward_zero_std": 0.0, "grad_norm": 2.5643541801729435, "kl": 0.012420654296875, "learning_rate": 8.732125912621966e-07, "loss": -0.0068, "num_tokens": 69701310.0, "reward": 0.0, "reward_std": 1.0112640857696533, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12665527476531674, "rewards/wordcountpos_reward/raw_geo/std": 0.10002495950164786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1038.4375, "completions/mean_terminated_length": 1038.4375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.3208641728345669, "frac_reward_zero_std": 0.0, "grad_norm": 3.2329555949635624, "kl": 0.01313018798828125, "learning_rate": 8.729938766169092e-07, "loss": 0.0078, "num_tokens": 69740045.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5262272357940674, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02001174282360432, "rewards/wordcountpos_reward/raw_geo/std": 0.09209511922984874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1023.9375, "completions/mean_terminated_length": 1023.9375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.32106421284256853, "frac_reward_zero_std": 0.0, "grad_norm": 3.7755137382972395, "kl": 0.016815185546875, "learning_rate": 8.727750044783408e-07, "loss": -0.0251, "num_tokens": 69780868.0, "reward": 0.0, "reward_std": 0.9580331444740295, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06635442620368658, "rewards/wordcountpos_reward/raw_geo/std": 0.05900486823426682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 1177.4375, "completions/mean_terminated_length": 1131.357177734375, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.3212642528505701, "frac_reward_zero_std": 0.0, "grad_norm": 3.5682915419764543, "kl": 0.0165252685546875, "learning_rate": 8.725559749532145e-07, "loss": -0.0375, "num_tokens": 69834563.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6535353660583496, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2805919912373367, "rewards/wordcountpos_reward/raw_geo/std": 0.22273099265681917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1339.875, "completions/mean_terminated_length": 1179.75, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.3214642928585717, "frac_reward_zero_std": 0.0, "grad_norm": 2.8659518310960523, "kl": 0.009185791015625, "learning_rate": 8.723367881483301e-07, "loss": -0.0044, "num_tokens": 69891201.0, "reward": 0.0, "reward_std": 0.5730748176574707, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03574774335486767, "rewards/wordcountpos_reward/raw_geo/std": 0.09131875823854112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1198.5, "completions/mean_terminated_length": 1178.4000244140625, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.3216643328665733, "frac_reward_zero_std": 0.0, "grad_norm": 3.56234700133416, "kl": 0.015289306640625, "learning_rate": 8.721174441705642e-07, "loss": -0.0829, "num_tokens": 69926753.0, "reward": 0.0, "reward_std": 0.6940462589263916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21455173226133972, "rewards/wordcountpos_reward/raw_geo/std": 0.15938293206465223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1220.5625, "completions/mean_terminated_length": 1220.5625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.3218643728745749, "frac_reward_zero_std": 0.0, "grad_norm": 2.5781193488214718, "kl": 0.009063720703125, "learning_rate": 8.718979431268698e-07, "loss": 0.0284, "num_tokens": 69977282.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8950475454330444, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01251881874843657, "rewards/wordcountpos_reward/raw_geo/std": 0.06520438036172192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 937.25, "completions/mean_terminated_length": 937.25, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.32206441288257653, "frac_reward_zero_std": 0.0, "grad_norm": 3.160596004016877, "kl": 0.0093231201171875, "learning_rate": 8.716782851242766e-07, "loss": -0.0117, "num_tokens": 70012326.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8929026126861572, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09306294584262934, "rewards/wordcountpos_reward/raw_geo/std": 0.07288388681667722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1289.8125, "completions/mean_terminated_length": 1275.800048828125, "completions/min_length": 1076.0, "completions/min_terminated_length": 1076.0, "epoch": 0.3222644528905781, "frac_reward_zero_std": 0.0, "grad_norm": 3.0742943218134227, "kl": 0.0139923095703125, "learning_rate": 8.714584702698908e-07, "loss": -0.0246, "num_tokens": 70058435.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6943762302398682, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10278817352013789, "rewards/wordcountpos_reward/raw_geo/std": 0.11470567582900348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1036.8125, "completions/mean_terminated_length": 1036.8125, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.3224644928985797, "frac_reward_zero_std": 0.0, "grad_norm": 3.55281022296121, "kl": 0.014251708984375, "learning_rate": 8.712384986708953e-07, "loss": -0.0169, "num_tokens": 70107696.0, "reward": 0.0, "reward_std": 0.8547852039337158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09444222534652028, "rewards/wordcountpos_reward/raw_geo/std": 0.0726889749413297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 983.5, "completions/mean_terminated_length": 983.5, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.3226645329065813, "frac_reward_zero_std": 0.0, "grad_norm": 2.8657924909863124, "kl": 0.0098419189453125, "learning_rate": 8.710183704345492e-07, "loss": 0.0184, "num_tokens": 70159304.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0441851615905762, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07690630457244645, "rewards/wordcountpos_reward/raw_geo/std": 0.07906305037403856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1131.125, "completions/mean_terminated_length": 1106.533447265625, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.3228645729145829, "frac_reward_zero_std": 0.0, "grad_norm": 4.207702987523146, "kl": 0.0517730712890625, "learning_rate": 8.707980856681878e-07, "loss": -0.0444, "num_tokens": 70200002.0, "reward": -1.862645149230957e-09, "reward_std": 0.9499379992485046, "rewards/wordcountpos_reward/mean": -1.862645149230957e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1420046854354127, "rewards/wordcountpos_reward/raw_geo/std": 0.12697468586952163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1031.0, "completions/mean_terminated_length": 1031.0, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.32306461292258454, "frac_reward_zero_std": 0.0, "grad_norm": 3.2384978616896927, "kl": 0.0124664306640625, "learning_rate": 8.705776444792232e-07, "loss": -0.0164, "num_tokens": 70234186.0, "reward": -3.725290298461914e-08, "reward_std": 1.052241325378418, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04186649685881302, "rewards/wordcountpos_reward/raw_geo/std": 0.13196762791416944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1230.3125, "completions/mean_terminated_length": 1212.3333740234375, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.3232646529305861, "frac_reward_zero_std": 0.0, "grad_norm": 2.6076732046868845, "kl": 0.0121307373046875, "learning_rate": 8.703570469751433e-07, "loss": -0.0039, "num_tokens": 70284703.0, "reward": 0.0, "reward_std": 0.7809607982635498, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10899989717424802, "rewards/wordcountpos_reward/raw_geo/std": 0.12932489352066298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 959.3125, "completions/mean_terminated_length": 959.3125, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.3234646929385877, "frac_reward_zero_std": 0.0, "grad_norm": 3.087324989361525, "kl": 0.0136871337890625, "learning_rate": 8.701362932635128e-07, "loss": -0.0306, "num_tokens": 70322604.0, "reward": 7.450580596923828e-09, "reward_std": 1.03993821144104, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.25063173637291003, "rewards/wordcountpos_reward/raw_geo/std": 0.1810272407662527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 884.75, "completions/mean_terminated_length": 884.75, "completions/min_length": 612.0, "completions/min_terminated_length": 612.0, "epoch": 0.3236647329465893, "frac_reward_zero_std": 0.0, "grad_norm": 3.8967115673213444, "kl": 0.02020263671875, "learning_rate": 8.699153834519718e-07, "loss": -0.0592, "num_tokens": 70365208.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6963033080101013, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11096943860833335, "rewards/wordcountpos_reward/raw_geo/std": 0.08204610233955069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1283.9375, "completions/mean_terminated_length": 1269.533447265625, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.32386477295459093, "frac_reward_zero_std": 0.0, "grad_norm": 3.0067059218709997, "kl": 0.01409912109375, "learning_rate": 8.696943176482372e-07, "loss": 0.0043, "num_tokens": 70412895.0, "reward": 5.960464477539063e-08, "reward_std": 0.8461166620254517, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11866382241507653, "rewards/wordcountpos_reward/raw_geo/std": 0.0785102798917892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 893.5, "completions/mean_terminated_length": 893.5, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.32406481296259254, "frac_reward_zero_std": 0.0, "grad_norm": 2.3749300901797596, "kl": 0.0144805908203125, "learning_rate": 8.694730959601017e-07, "loss": -0.1635, "num_tokens": 70447335.0, "reward": 0.0, "reward_std": 0.7249225974082947, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027121271229428656, "rewards/wordcountpos_reward/raw_geo/std": 0.1720343724491237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.19958289839896937, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1416.125, "completions/mean_terminated_length": 1396.769287109375, "completions/min_length": 1248.0, "completions/min_terminated_length": 1248.0, "epoch": 0.3242648529705941, "frac_reward_zero_std": 0.0, "grad_norm": 2.3064195962591008, "kl": 0.0077362060546875, "learning_rate": 8.692517184954339e-07, "loss": 0.007, "num_tokens": 70494705.0, "reward": 0.0, "reward_std": 0.7915651798248291, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.030017199548312057, "rewards/wordcountpos_reward/raw_geo/std": 0.0761406002201284, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1026.9375, "completions/mean_terminated_length": 995.4000244140625, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.3244648929785957, "frac_reward_zero_std": 0.0, "grad_norm": 3.6844588554817648, "kl": 0.017303466796875, "learning_rate": 8.690301853621783e-07, "loss": 0.0072, "num_tokens": 70531288.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0639327764511108, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020742172151316927, "rewards/wordcountpos_reward/raw_geo/std": 0.13812770632466323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.15244914148902494, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1079.3125, "completions/mean_terminated_length": 1079.3125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3246649329865973, "frac_reward_zero_std": 0.0, "grad_norm": 2.6415624385241765, "kl": 0.01348876953125, "learning_rate": 8.688084966683557e-07, "loss": -0.0148, "num_tokens": 70569341.0, "reward": 0.0, "reward_std": 0.7021139860153198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0024738637670690487, "rewards/wordcountpos_reward/raw_geo/std": 0.07208491778762689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1198.75, "completions/mean_terminated_length": 1178.666748046875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.32486497299459893, "frac_reward_zero_std": 0.0, "grad_norm": 3.1871723881040337, "kl": 0.0140838623046875, "learning_rate": 8.685866525220625e-07, "loss": 0.0193, "num_tokens": 70617985.0, "reward": 0.0, "reward_std": 0.991610050201416, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026123139112771043, "rewards/wordcountpos_reward/raw_geo/std": 0.09322527614596396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1162.0, "completions/mean_terminated_length": 1113.71435546875, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.32506501300260054, "frac_reward_zero_std": 0.0, "grad_norm": 2.8886941801296477, "kl": 0.0100250244140625, "learning_rate": 8.683646530314709e-07, "loss": 0.0243, "num_tokens": 70653497.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8914742469787598, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007382820731701609, "rewards/wordcountpos_reward/raw_geo/std": 0.07023277900114537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.17018508443151817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1282.6875, "completions/mean_terminated_length": 1268.2000732421875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.3252650530106021, "frac_reward_zero_std": 0.0, "grad_norm": 2.7038608058215643, "kl": 0.0116729736328125, "learning_rate": 8.681424983048288e-07, "loss": -0.0215, "num_tokens": 70703564.0, "reward": 0.0, "reward_std": 1.025007724761963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.023557903369122793, "rewards/wordcountpos_reward/raw_geo/std": 0.2269531625116964, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1085.625, "completions/mean_terminated_length": 1085.625, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.3254650930186037, "frac_reward_zero_std": 0.0, "grad_norm": 2.870238551288568, "kl": 0.012054443359375, "learning_rate": 8.679201884504598e-07, "loss": 0.0303, "num_tokens": 70746406.0, "reward": -2.9802322387695312e-08, "reward_std": 0.811503529548645, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11347384029382013, "rewards/wordcountpos_reward/raw_geo/std": 0.27566224391343047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1053.6875, "completions/mean_terminated_length": 1053.6875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.3256651330266053, "frac_reward_zero_std": 0.0, "grad_norm": 3.824436226209106, "kl": 0.018707275390625, "learning_rate": 8.676977235767632e-07, "loss": -0.0084, "num_tokens": 70782289.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9631739258766174, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.057504655034868894, "rewards/wordcountpos_reward/raw_geo/std": 0.05901597470434708, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1294.625, "completions/mean_terminated_length": 1265.2857666015625, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.32586517303460694, "frac_reward_zero_std": 0.0, "grad_norm": 1.8520271089329634, "kl": 0.00908660888671875, "learning_rate": 8.674751037922141e-07, "loss": -0.0413, "num_tokens": 70829155.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8651070594787598, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24366039398044062, "rewards/wordcountpos_reward/raw_geo/std": 0.13818310680879822, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1002.75, "completions/mean_terminated_length": 1002.75, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.32606521304260855, "frac_reward_zero_std": 0.0, "grad_norm": 4.481604806319879, "kl": 0.0293731689453125, "learning_rate": 8.672523292053627e-07, "loss": 0.0029, "num_tokens": 70877207.0, "reward": 5.960464477539063e-08, "reward_std": 0.8460835218429565, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09866266998428361, "rewards/wordcountpos_reward/raw_geo/std": 0.11878553207445729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1347.5, "completions/mean_terminated_length": 1278.181884765625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.3262652530506101, "frac_reward_zero_std": 0.0, "grad_norm": 3.2739952464531283, "kl": 0.0124359130859375, "learning_rate": 8.670293999248351e-07, "loss": -0.0361, "num_tokens": 70931015.0, "reward": 0.0, "reward_std": 0.6473633050918579, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1593686448416754, "rewards/wordcountpos_reward/raw_geo/std": 0.17186180323953784, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1277.6875, "completions/mean_terminated_length": 1104.77783203125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.3264652930586117, "frac_reward_zero_std": 0.0, "grad_norm": 2.380233976584486, "kl": 0.0093841552734375, "learning_rate": 8.668063160593323e-07, "loss": -0.0044, "num_tokens": 70970410.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8626672029495239, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.018328363456941933, "rewards/wordcountpos_reward/raw_geo/std": 0.051612424428809665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1285.625, "completions/mean_terminated_length": 1271.3333740234375, "completions/min_length": 1127.0, "completions/min_terminated_length": 1127.0, "epoch": 0.32666533306661333, "frac_reward_zero_std": 0.0, "grad_norm": 2.912150342870821, "kl": 0.0124359130859375, "learning_rate": 8.665830777176314e-07, "loss": 0.0005, "num_tokens": 71013988.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9188027381896973, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0988813874641122, "rewards/wordcountpos_reward/raw_geo/std": 0.0718615768363995, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1043.25, "completions/mean_terminated_length": 1012.800048828125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.32686537307461494, "frac_reward_zero_std": 0.0, "grad_norm": 2.6431475752789555, "kl": 0.0120849609375, "learning_rate": 8.66359685008584e-07, "loss": 0.0248, "num_tokens": 71057240.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9554632902145386, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13324790943650425, "rewards/wordcountpos_reward/raw_geo/std": 0.13668713752796496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1063.0625, "completions/mean_terminated_length": 1063.0625, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.3270654130826165, "frac_reward_zero_std": 0.0, "grad_norm": 3.1024808466646645, "kl": 0.0124053955078125, "learning_rate": 8.661361380411178e-07, "loss": -0.0439, "num_tokens": 71096953.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0119599103927612, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005895189283164446, "rewards/wordcountpos_reward/raw_geo/std": 0.038449970513415484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1145.3125, "completions/mean_terminated_length": 1121.666748046875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.3272654530906181, "frac_reward_zero_std": 0.0, "grad_norm": 2.786977243972997, "kl": 0.01043701171875, "learning_rate": 8.65912436924235e-07, "loss": -0.0457, "num_tokens": 71138254.0, "reward": 0.0, "reward_std": 0.6566910743713379, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07098212426707291, "rewards/wordcountpos_reward/raw_geo/std": 0.14603928747891723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1055.625, "completions/mean_terminated_length": 1055.625, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.3274654930986197, "frac_reward_zero_std": 0.0, "grad_norm": 3.4816506170069794, "kl": 0.015167236328125, "learning_rate": 8.656885817670135e-07, "loss": 0.0012, "num_tokens": 71178344.0, "reward": 7.450580596923828e-09, "reward_std": 1.0401662588119507, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.026643690060561238, "rewards/wordcountpos_reward/raw_geo/std": 0.08655590093713117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 969.4375, "completions/mean_terminated_length": 969.4375, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.32766553310662133, "frac_reward_zero_std": 0.0, "grad_norm": 3.4958827301426596, "kl": 0.0151519775390625, "learning_rate": 8.654645726786061e-07, "loss": 0.0137, "num_tokens": 71216191.0, "reward": 0.0, "reward_std": 1.0296348333358765, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08525755774722532, "rewards/wordcountpos_reward/raw_geo/std": 0.13090007042257243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05900408021045224, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 962.8125, "completions/mean_terminated_length": 962.8125, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.32786557311462294, "frac_reward_zero_std": 0.0, "grad_norm": 3.330979813222228, "kl": 0.013275146484375, "learning_rate": 8.652404097682405e-07, "loss": -0.0221, "num_tokens": 71247620.0, "reward": 0.0, "reward_std": 0.9763050079345703, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.034950029441197586, "rewards/wordcountpos_reward/raw_geo/std": 0.055553254731595295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 974.75, "completions/mean_terminated_length": 974.75, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.3280656131226245, "frac_reward_zero_std": 0.0, "grad_norm": 2.813197549012045, "kl": 0.009838104248046875, "learning_rate": 8.650160931452196e-07, "loss": -0.0919, "num_tokens": 71278136.0, "reward": 0.0, "reward_std": 0.9673024415969849, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027178198227300658, "rewards/wordcountpos_reward/raw_geo/std": 0.06780357145758219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.133263870794973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1106.4375, "completions/mean_terminated_length": 1106.4375, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3282656531306261, "frac_reward_zero_std": 0.0, "grad_norm": 3.4106662825780982, "kl": 0.0161590576171875, "learning_rate": 8.647916229189212e-07, "loss": 0.0031, "num_tokens": 71326951.0, "reward": 0.0, "reward_std": 1.0174760818481445, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14574068621798153, "rewards/wordcountpos_reward/raw_geo/std": 0.07431662759278944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1218.1875, "completions/mean_terminated_length": 1218.1875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.3284656931386277, "frac_reward_zero_std": 0.0, "grad_norm": 3.3698710533709826, "kl": 0.0153045654296875, "learning_rate": 8.645669991987981e-07, "loss": -0.0222, "num_tokens": 71372946.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7736269235610962, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08711889409657017, "rewards/wordcountpos_reward/raw_geo/std": 0.11308945630439972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1241.5625, "completions/mean_terminated_length": 1241.5625, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.32866573314662934, "frac_reward_zero_std": 0.0, "grad_norm": 3.5851789470653372, "kl": 0.016510009765625, "learning_rate": 8.643422220943778e-07, "loss": 0.0253, "num_tokens": 71422483.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0508407354354858, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048591729561694816, "rewards/wordcountpos_reward/raw_geo/std": 0.23428493890429034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619462, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1171.0625, "completions/mean_terminated_length": 1124.071533203125, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.32886577315463095, "frac_reward_zero_std": 0.0, "grad_norm": 3.1974760306277523, "kl": 0.0152435302734375, "learning_rate": 8.641172917152626e-07, "loss": -0.0214, "num_tokens": 71474468.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0029001235961914, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10339928629604174, "rewards/wordcountpos_reward/raw_geo/std": 0.17249413143218512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1137.625, "completions/mean_terminated_length": 1137.625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.3290658131626325, "frac_reward_zero_std": 0.0, "grad_norm": 2.666674517422969, "kl": 0.0094451904296875, "learning_rate": 8.638922081711295e-07, "loss": 0.0133, "num_tokens": 71526926.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0487515926361084, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07022498912745798, "rewards/wordcountpos_reward/raw_geo/std": 0.12518156411011677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1226.8125, "completions/mean_terminated_length": 1226.8125, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.3292658531706341, "frac_reward_zero_std": 0.0, "grad_norm": 2.4692460036480885, "kl": 0.010894775390625, "learning_rate": 8.636669715717304e-07, "loss": -0.0106, "num_tokens": 71579091.0, "reward": 0.0, "reward_std": 0.8387613892555237, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17756178454481217, "rewards/wordcountpos_reward/raw_geo/std": 0.11850272511484324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 856.4375, "completions/mean_terminated_length": 856.4375, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.3294658931786357, "frac_reward_zero_std": 0.0, "grad_norm": 4.082142011814785, "kl": 0.019256591796875, "learning_rate": 8.634415820268915e-07, "loss": -0.0632, "num_tokens": 71616866.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0489314794540405, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04150882351501215, "rewards/wordcountpos_reward/raw_geo/std": 0.11087506226723201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1241.4285888671875, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.32966593318663734, "frac_reward_zero_std": 0.0, "grad_norm": 3.1454323935490933, "kl": 0.012237548828125, "learning_rate": 8.63216039646514e-07, "loss": 0.0119, "num_tokens": 71659750.0, "reward": 0.0, "reward_std": 0.4110639691352844, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1701982890269431, "rewards/wordcountpos_reward/raw_geo/std": 0.21120580793522808, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1215.875, "completions/mean_terminated_length": 1196.933349609375, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.32986597319463895, "frac_reward_zero_std": 0.0, "grad_norm": 3.140199414086592, "kl": 0.013336181640625, "learning_rate": 8.629903445405733e-07, "loss": 0.0313, "num_tokens": 71696484.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0495773553848267, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07906567484375807, "rewards/wordcountpos_reward/raw_geo/std": 0.10252425199351857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1217.125, "completions/mean_terminated_length": 1217.125, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.3300660132026405, "frac_reward_zero_std": 0.0, "grad_norm": 3.013047334975712, "kl": 0.0157928466796875, "learning_rate": 8.627644968191195e-07, "loss": -0.0169, "num_tokens": 71738174.0, "reward": 0.0, "reward_std": 0.7353625893592834, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11402812388826357, "rewards/wordcountpos_reward/raw_geo/std": 0.15007952425869373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1288.625, "completions/mean_terminated_length": 1274.533447265625, "completions/min_length": 1113.0, "completions/min_terminated_length": 1113.0, "epoch": 0.3302660532106421, "frac_reward_zero_std": 0.0, "grad_norm": 2.853452778109157, "kl": 0.013458251953125, "learning_rate": 8.625384965922767e-07, "loss": 0.0043, "num_tokens": 71791224.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9514033794403076, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05407114467535135, "rewards/wordcountpos_reward/raw_geo/std": 0.04370856550435641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23122059372591136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1049.1875, "completions/mean_terminated_length": 1049.1875, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.33046609321864373, "frac_reward_zero_std": 0.0, "grad_norm": 3.6551948462596764, "kl": 0.015472412109375, "learning_rate": 8.623123439702435e-07, "loss": 0.053, "num_tokens": 71838875.0, "reward": 1.4901161193847656e-08, "reward_std": 1.035784125328064, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20947465513796806, "rewards/wordcountpos_reward/raw_geo/std": 0.04648318660357534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1065.4615478515625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.33066613322664534, "frac_reward_zero_std": 0.0, "grad_norm": 3.419436734271246, "kl": 0.0164947509765625, "learning_rate": 8.620860390632935e-07, "loss": -0.0666, "num_tokens": 71879986.0, "reward": 0.0, "reward_std": 0.8505703210830688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15760804734126216, "rewards/wordcountpos_reward/raw_geo/std": 0.23067096221501385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1017.5625, "completions/mean_terminated_length": 1017.5625, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.33086617323464695, "frac_reward_zero_std": 0.0, "grad_norm": 3.7391046197896953, "kl": 0.014678955078125, "learning_rate": 8.618595819817736e-07, "loss": -0.0155, "num_tokens": 71918491.0, "reward": 7.450580596923828e-09, "reward_std": 1.0449975728988647, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06934497790695482, "rewards/wordcountpos_reward/raw_geo/std": 0.0976157506498915, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1169.4375, "completions/mean_terminated_length": 1169.4375, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.3310662132426485, "frac_reward_zero_std": 0.0, "grad_norm": 3.093848017971346, "kl": 0.012908935546875, "learning_rate": 8.616329728361055e-07, "loss": -0.0028, "num_tokens": 71966698.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9457998871803284, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.049177681280332036, "rewards/wordcountpos_reward/raw_geo/std": 0.07021265181511138, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1248.625, "completions/mean_terminated_length": 1231.86669921875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3312662532506501, "frac_reward_zero_std": 0.0, "grad_norm": 2.6797721300755004, "kl": 0.0099334716796875, "learning_rate": 8.614062117367846e-07, "loss": -0.0044, "num_tokens": 72017660.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9649055004119873, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12770360326382632, "rewards/wordcountpos_reward/raw_geo/std": 0.10364378276182498, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1095.9375, "completions/mean_terminated_length": 1095.9375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.33146629325865173, "frac_reward_zero_std": 0.0, "grad_norm": 3.6198036608850663, "kl": 0.0180816650390625, "learning_rate": 8.611792987943808e-07, "loss": -0.0333, "num_tokens": 72068851.0, "reward": 0.0, "reward_std": 0.9832199811935425, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.033657054734761806, "rewards/wordcountpos_reward/raw_geo/std": 0.21148635202019553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1068.25, "completions/mean_terminated_length": 1039.4666748046875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.33166633326665335, "frac_reward_zero_std": 0.0, "grad_norm": 2.7339064249219756, "kl": 0.0131072998046875, "learning_rate": 8.609522341195379e-07, "loss": 0.0261, "num_tokens": 72099087.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9827046394348145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15818980448217593, "rewards/wordcountpos_reward/raw_geo/std": 0.2720681647812184, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1374.4375, "completions/mean_terminated_length": 1317.3636474609375, "completions/min_length": 1154.0, "completions/min_terminated_length": 1154.0, "epoch": 0.33186637327465496, "frac_reward_zero_std": 0.0, "grad_norm": 3.1705661389213136, "kl": 0.014862060546875, "learning_rate": 8.607250178229737e-07, "loss": 0.0129, "num_tokens": 72146886.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9550391435623169, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1279016708894423, "rewards/wordcountpos_reward/raw_geo/std": 0.13708704452196357, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1088.4375, "completions/mean_terminated_length": 1088.4375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.3320664132826565, "frac_reward_zero_std": 0.0, "grad_norm": 3.158068988013846, "kl": 0.01239013671875, "learning_rate": 8.604976500154799e-07, "loss": -0.0212, "num_tokens": 72189541.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0630125999450684, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021816352598230483, "rewards/wordcountpos_reward/raw_geo/std": 0.07414967833763413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1283.5625, "completions/mean_terminated_length": 1211.416748046875, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.3322664532906581, "frac_reward_zero_std": 0.0, "grad_norm": 3.45457921404488, "kl": 0.0171356201171875, "learning_rate": 8.602701308079217e-07, "loss": 0.0152, "num_tokens": 72242542.0, "reward": -5.960464477539063e-08, "reward_std": 0.8233770728111267, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03192580890506957, "rewards/wordcountpos_reward/raw_geo/std": 0.06079735098621222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1202.375, "completions/mean_terminated_length": 1182.533447265625, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.33246649329865974, "frac_reward_zero_std": 0.0, "grad_norm": 3.03387048744309, "kl": 0.013702392578125, "learning_rate": 8.600424603112391e-07, "loss": 0.0431, "num_tokens": 72281532.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9889223575592041, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07276839249551871, "rewards/wordcountpos_reward/raw_geo/std": 0.07919626438968685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1178.0625, "completions/mean_terminated_length": 1178.0625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.33266653330666135, "frac_reward_zero_std": 0.0, "grad_norm": 2.8906268944682085, "kl": 0.0111083984375, "learning_rate": 8.598146386364447e-07, "loss": -0.0403, "num_tokens": 72325597.0, "reward": -1.4901161193847656e-08, "reward_std": 1.067464828491211, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022731399097937005, "rewards/wordcountpos_reward/raw_geo/std": 0.11610339578448907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1113.8125, "completions/mean_terminated_length": 1088.0667724609375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.3328665733146629, "frac_reward_zero_std": 0.0, "grad_norm": 2.31868360429403, "kl": 0.009456634521484375, "learning_rate": 8.59586665894626e-07, "loss": 0.0196, "num_tokens": 72357778.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9441494941711426, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15478742742293902, "rewards/wordcountpos_reward/raw_geo/std": 0.09099926630773468, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 979.3125, "completions/mean_terminated_length": 979.3125, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.3330666133226645, "frac_reward_zero_std": 0.0, "grad_norm": 3.673043262217543, "kl": 0.01153564453125, "learning_rate": 8.59358542196943e-07, "loss": 0.0285, "num_tokens": 72391511.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9658478498458862, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10427420556801044, "rewards/wordcountpos_reward/raw_geo/std": 0.1122056485688583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0709720863229836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1269.6875, "completions/mean_terminated_length": 1192.916748046875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.33326665333066613, "frac_reward_zero_std": 0.0, "grad_norm": 2.3573459374820165, "kl": 0.01042938232421875, "learning_rate": 8.591302676546302e-07, "loss": 0.0106, "num_tokens": 72443762.0, "reward": 0.0, "reward_std": 0.9953340291976929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12791598577088648, "rewards/wordcountpos_reward/raw_geo/std": 0.1395054680612004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1036.8125, "completions/mean_terminated_length": 1036.8125, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.33346669333866774, "frac_reward_zero_std": 0.0, "grad_norm": 3.0687246191093975, "kl": 0.00966644287109375, "learning_rate": 8.589018423789951e-07, "loss": 0.0199, "num_tokens": 72475327.0, "reward": -3.3527612686157227e-08, "reward_std": 1.0489134788513184, "rewards/wordcountpos_reward/mean": -3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07922752826829943, "rewards/wordcountpos_reward/raw_geo/std": 0.05048213869472424, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1068.0, "completions/mean_terminated_length": 1068.0, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.33366673334666935, "frac_reward_zero_std": 0.0, "grad_norm": 2.9688096576671072, "kl": 0.0144195556640625, "learning_rate": 8.586732664814189e-07, "loss": 0.0163, "num_tokens": 72520543.0, "reward": 0.0, "reward_std": 0.9736728668212891, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13445471339927012, "rewards/wordcountpos_reward/raw_geo/std": 0.11353455947446682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 968.875, "completions/mean_terminated_length": 968.875, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.3338667733546709, "frac_reward_zero_std": 0.0, "grad_norm": 4.011029033161554, "kl": 0.02215576171875, "learning_rate": 8.584445400733564e-07, "loss": -0.0123, "num_tokens": 72568885.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9159786701202393, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19224829629672893, "rewards/wordcountpos_reward/raw_geo/std": 0.3139140649613301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1436.75, "completions/mean_terminated_length": 1387.5555419921875, "completions/min_length": 1247.0, "completions/min_terminated_length": 1247.0, "epoch": 0.3340668133626725, "frac_reward_zero_std": 0.0, "grad_norm": 2.797773224926699, "kl": 0.0137176513671875, "learning_rate": 8.582156632663356e-07, "loss": 0.0125, "num_tokens": 72620753.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0462162494659424, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014076951822259275, "rewards/wordcountpos_reward/raw_geo/std": 0.09544848809223545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 949.0, "completions/mean_terminated_length": 949.0, "completions/min_length": 533.0, "completions/min_terminated_length": 533.0, "epoch": 0.33426685337067413, "frac_reward_zero_std": 0.0, "grad_norm": 3.8084494983363233, "kl": 0.017730712890625, "learning_rate": 8.579866361719575e-07, "loss": -0.0403, "num_tokens": 72654745.0, "reward": 0.0, "reward_std": 0.6010411977767944, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07513890144512154, "rewards/wordcountpos_reward/raw_geo/std": 0.1124821009424362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1244.0, "completions/mean_terminated_length": 1226.933349609375, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.33446689337867574, "frac_reward_zero_std": 0.0, "grad_norm": 3.195706676565709, "kl": 0.0156707763671875, "learning_rate": 8.577574589018974e-07, "loss": 0.0013, "num_tokens": 72704057.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8122002482414246, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10090319999582571, "rewards/wordcountpos_reward/raw_geo/std": 0.08906120160618294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1060.5625, "completions/mean_terminated_length": 1060.5625, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.33466693338667736, "frac_reward_zero_std": 0.0, "grad_norm": 1.9792483144032362, "kl": 0.006511688232421875, "learning_rate": 8.575281315679027e-07, "loss": -0.0146, "num_tokens": 72752170.0, "reward": 0.0, "reward_std": 0.8763498067855835, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004752864950806888, "rewards/wordcountpos_reward/raw_geo/std": 0.10706151939937984, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1255.125, "completions/mean_terminated_length": 1010.25, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3348669733946789, "frac_reward_zero_std": 0.0, "grad_norm": 2.41506697906152, "kl": 0.0066127777099609375, "learning_rate": 8.572986542817948e-07, "loss": -0.0485, "num_tokens": 72794364.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5844442844390869, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09382704979471018, "rewards/wordcountpos_reward/raw_geo/std": 0.08939381503766418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1188.0625, "completions/mean_terminated_length": 1116.076904296875, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.3350670134026805, "frac_reward_zero_std": 0.0, "grad_norm": 2.7824489420780694, "kl": 0.011199951171875, "learning_rate": 8.570690271554674e-07, "loss": 0.0587, "num_tokens": 72840645.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9770321846008301, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09799653364933583, "rewards/wordcountpos_reward/raw_geo/std": 0.10747258111910908, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1020.0625, "completions/mean_terminated_length": 1020.0625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.33526705341068214, "frac_reward_zero_std": 0.0, "grad_norm": 3.172385655854961, "kl": 0.017578125, "learning_rate": 8.56839250300888e-07, "loss": -0.0059, "num_tokens": 72872470.0, "reward": 2.60770320892334e-08, "reward_std": 1.0450825691223145, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010496641159481056, "rewards/wordcountpos_reward/raw_geo/std": 0.05656542586081801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 948.875, "completions/mean_terminated_length": 948.875, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.33546709341868375, "frac_reward_zero_std": 0.0, "grad_norm": 3.5117266991860214, "kl": 0.01335906982421875, "learning_rate": 8.566093238300968e-07, "loss": -0.0167, "num_tokens": 72924500.0, "reward": -5.960464477539063e-08, "reward_std": 0.6853330135345459, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021708956404889006, "rewards/wordcountpos_reward/raw_geo/std": 0.04958926849206862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.1813529401164726, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1185.625, "completions/mean_terminated_length": 1164.666748046875, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.33566713342668536, "frac_reward_zero_std": 0.0, "grad_norm": 3.6184405195915135, "kl": 0.020050048828125, "learning_rate": 8.563792478552071e-07, "loss": -0.0289, "num_tokens": 72972318.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9075536727905273, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06601963105528937, "rewards/wordcountpos_reward/raw_geo/std": 0.09386959513664175, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1234.0625, "completions/mean_terminated_length": 1196.071533203125, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.3358671734346869, "frac_reward_zero_std": 0.0, "grad_norm": 2.9894226162165976, "kl": 0.01361083984375, "learning_rate": 8.561490224884049e-07, "loss": -0.0047, "num_tokens": 73021471.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7478127479553223, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2846949863287823, "rewards/wordcountpos_reward/raw_geo/std": 0.15734597264230835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 950.5625, "completions/mean_terminated_length": 950.5625, "completions/min_length": 677.0, "completions/min_terminated_length": 677.0, "epoch": 0.33606721344268853, "frac_reward_zero_std": 0.0, "grad_norm": 3.489116698625775, "kl": 0.0149993896484375, "learning_rate": 8.559186478419492e-07, "loss": -0.0272, "num_tokens": 73067536.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4865996241569519, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12460323807770043, "rewards/wordcountpos_reward/raw_geo/std": 0.16504823488215692, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787749, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1166.1875, "completions/mean_terminated_length": 1166.1875, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.33626725345069014, "frac_reward_zero_std": 0.0, "grad_norm": 3.0307864938069398, "kl": 0.0145263671875, "learning_rate": 8.556881240281715e-07, "loss": -0.0103, "num_tokens": 73115451.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0014188289642334, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02608589558019482, "rewards/wordcountpos_reward/raw_geo/std": 0.08555588627798585, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1178.5, "completions/mean_terminated_length": 1157.0667724609375, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.33646729345869175, "frac_reward_zero_std": 0.0, "grad_norm": 2.8457920750430805, "kl": 0.010711669921875, "learning_rate": 8.554574511594766e-07, "loss": -0.0615, "num_tokens": 73163363.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9478765726089478, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05854922238833614, "rewards/wordcountpos_reward/raw_geo/std": 0.0657059204017052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1086.375, "completions/mean_terminated_length": 1086.375, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.33666733346669336, "frac_reward_zero_std": 0.0, "grad_norm": 3.5279214912253614, "kl": 0.0228271484375, "learning_rate": 8.552266293483415e-07, "loss": -0.0277, "num_tokens": 73212097.0, "reward": 0.0, "reward_std": 0.6660162210464478, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.141658077100029, "rewards/wordcountpos_reward/raw_geo/std": 0.10384053885566608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1079.0625, "completions/mean_terminated_length": 1079.0625, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.3368673734746949, "frac_reward_zero_std": 0.0, "grad_norm": 3.6314944215100384, "kl": 0.016357421875, "learning_rate": 8.549956587073157e-07, "loss": -0.0604, "num_tokens": 73242778.0, "reward": 0.0, "reward_std": 0.6610418558120728, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06343283291496019, "rewards/wordcountpos_reward/raw_geo/std": 0.06877806093514871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1259.625, "completions/mean_terminated_length": 1243.60009765625, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.33706741348269653, "frac_reward_zero_std": 0.0, "grad_norm": 2.3939930375939507, "kl": 0.013427734375, "learning_rate": 8.547645393490218e-07, "loss": 0.0446, "num_tokens": 73289444.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9426002502441406, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.042173102293494705, "rewards/wordcountpos_reward/raw_geo/std": 0.09454198432796888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1078.625, "completions/mean_terminated_length": 1018.4285888671875, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.33726745349069814, "frac_reward_zero_std": 0.0, "grad_norm": 3.2959746657704767, "kl": 0.0160369873046875, "learning_rate": 8.545332713861547e-07, "loss": -0.0589, "num_tokens": 73336646.0, "reward": 5.960464477539063e-08, "reward_std": 0.7477917671203613, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1673657800842179, "rewards/wordcountpos_reward/raw_geo/std": 0.12065034760598435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1001.727294921875, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.33746749349869976, "frac_reward_zero_std": 0.0, "grad_norm": 3.3433469983300785, "kl": 0.013946533203125, "learning_rate": 8.543018549314817e-07, "loss": 0.0257, "num_tokens": 73380509.0, "reward": 0.0, "reward_std": 0.8304761648178101, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09634061591515866, "rewards/wordcountpos_reward/raw_geo/std": 0.08681559871076935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1006.9375, "completions/mean_terminated_length": 1006.9375, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.33766753350670137, "frac_reward_zero_std": 0.0, "grad_norm": 3.5503258987273845, "kl": 0.0174102783203125, "learning_rate": 8.540702900978424e-07, "loss": -0.0212, "num_tokens": 73414732.0, "reward": -3.725290298461914e-09, "reward_std": 1.0035524368286133, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07777010897557533, "rewards/wordcountpos_reward/raw_geo/std": 0.06675779960420548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1236.5625, "completions/mean_terminated_length": 1219.0001220703125, "completions/min_length": 1015.0, "completions/min_terminated_length": 1015.0, "epoch": 0.3378675735147029, "frac_reward_zero_std": 0.0, "grad_norm": 2.5994790645175265, "kl": 0.010589599609375, "learning_rate": 8.538385769981488e-07, "loss": 0.0143, "num_tokens": 73465677.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6215928792953491, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008066809599905873, "rewards/wordcountpos_reward/raw_geo/std": 0.10528487473474683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1032.125, "completions/mean_terminated_length": 1032.125, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.33806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 3.255731032663374, "kl": 0.0111083984375, "learning_rate": 8.536067157453854e-07, "loss": 0.0023, "num_tokens": 73504783.0, "reward": 0.0, "reward_std": 0.9578646421432495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04860542256463814, "rewards/wordcountpos_reward/raw_geo/std": 0.10861274109979441, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 1000.375, "completions/mean_terminated_length": 1000.375, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.33826765353070615, "frac_reward_zero_std": 0.0, "grad_norm": 3.486230219453571, "kl": 0.0286865234375, "learning_rate": 8.533747064526087e-07, "loss": 0.0086, "num_tokens": 73535605.0, "reward": 0.0, "reward_std": 1.0157032012939453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.23042252016333845, "rewards/wordcountpos_reward/raw_geo/std": 0.22507826234505102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1125.4375, "completions/mean_terminated_length": 1100.4666748046875, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.33846769353870776, "frac_reward_zero_std": 0.0, "grad_norm": 3.529825517699508, "kl": 0.01629638671875, "learning_rate": 8.531425492329474e-07, "loss": -0.0021, "num_tokens": 73578620.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4763606786727905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0594400785715604, "rewards/wordcountpos_reward/raw_geo/std": 0.16236111573416034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1205.3125, "completions/mean_terminated_length": 1205.3125, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.3386677335467093, "frac_reward_zero_std": 0.0, "grad_norm": 3.3472536656699363, "kl": 0.016021728515625, "learning_rate": 8.529102441996028e-07, "loss": -0.0059, "num_tokens": 73631113.0, "reward": 0.0, "reward_std": 0.9410836696624756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08334570965803832, "rewards/wordcountpos_reward/raw_geo/std": 0.042396005404711064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1047.875, "completions/mean_terminated_length": 1047.875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.3388677735547109, "frac_reward_zero_std": 0.0, "grad_norm": 3.174520829051601, "kl": 0.0142669677734375, "learning_rate": 8.526777914658475e-07, "loss": -0.0349, "num_tokens": 73672167.0, "reward": 0.0, "reward_std": 0.6043229699134827, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13830292763916593, "rewards/wordcountpos_reward/raw_geo/std": 0.08418298029182496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1100.75, "completions/mean_terminated_length": 1074.1334228515625, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.33906781356271254, "frac_reward_zero_std": 0.0, "grad_norm": 3.53477459479101, "kl": 0.0135650634765625, "learning_rate": 8.524451911450268e-07, "loss": -0.0091, "num_tokens": 73706739.0, "reward": 7.450580596923828e-09, "reward_std": 1.000130295753479, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.054692457934656026, "rewards/wordcountpos_reward/raw_geo/std": 0.07604195782104117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1096.8125, "completions/mean_terminated_length": 1069.933349609375, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.33926785357071415, "frac_reward_zero_std": 0.0, "grad_norm": 3.285804865181038, "kl": 0.01416015625, "learning_rate": 8.522124433505574e-07, "loss": -0.0114, "num_tokens": 73738840.0, "reward": 0.0, "reward_std": 0.7916179895401001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10394018309378934, "rewards/wordcountpos_reward/raw_geo/std": 0.14658636796849137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1251.5625, "completions/mean_terminated_length": 1194.2308349609375, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.33946789357871576, "frac_reward_zero_std": 0.0, "grad_norm": 3.224388140361333, "kl": 0.0167694091796875, "learning_rate": 8.519795481959283e-07, "loss": -0.027, "num_tokens": 73784249.0, "reward": 0.0, "reward_std": 0.6722656488418579, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029178315980885165, "rewards/wordcountpos_reward/raw_geo/std": 0.15951085033878243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.18534252575124754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1094.125, "completions/mean_terminated_length": 1094.125, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.3396679335867173, "frac_reward_zero_std": 0.0, "grad_norm": 3.0254649778119687, "kl": 0.0140228271484375, "learning_rate": 8.517465057947004e-07, "loss": -0.055, "num_tokens": 73824675.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8562289476394653, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03133500735859041, "rewards/wordcountpos_reward/raw_geo/std": 0.12597710423371797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 1059.375, "completions/mean_terminated_length": 1030.0, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.33986797359471893, "frac_reward_zero_std": 0.0, "grad_norm": 3.712921333004163, "kl": 0.01666259765625, "learning_rate": 8.51513316260506e-07, "loss": 0.0206, "num_tokens": 73862577.0, "reward": 0.0, "reward_std": 0.8229051232337952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.028947265612873946, "rewards/wordcountpos_reward/raw_geo/std": 0.3676386182022282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194866, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1107.1875, "completions/mean_terminated_length": 1107.1875, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.34006801360272054, "frac_reward_zero_std": 0.0, "grad_norm": 3.3042779721581548, "kl": 0.0140380859375, "learning_rate": 8.512799797070492e-07, "loss": -0.0341, "num_tokens": 73905756.0, "reward": 0.0, "reward_std": 0.9211150407791138, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06485066539210994, "rewards/wordcountpos_reward/raw_geo/std": 0.1920853186133484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 943.0, "completions/mean_terminated_length": 943.0, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.34026805361072215, "frac_reward_zero_std": 0.0, "grad_norm": 3.916835334439108, "kl": 0.019866943359375, "learning_rate": 8.510464962481065e-07, "loss": -0.009, "num_tokens": 73947964.0, "reward": 0.0, "reward_std": 0.8240481019020081, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006151566162790921, "rewards/wordcountpos_reward/raw_geo/std": 0.04093682921242568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1189459883650901, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1138.625, "completions/mean_terminated_length": 1114.533447265625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.34046809361872377, "frac_reward_zero_std": 0.0, "grad_norm": 3.213841139030795, "kl": 0.015380859375, "learning_rate": 8.508128659975251e-07, "loss": 0.0058, "num_tokens": 73999462.0, "reward": 7.450580596923828e-09, "reward_std": 0.9345962405204773, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08579745101675412, "rewards/wordcountpos_reward/raw_geo/std": 0.1978860177849229, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1373.125, "completions/mean_terminated_length": 1246.25, "completions/min_length": 1186.0, "completions/min_terminated_length": 1186.0, "epoch": 0.3406681336267253, "frac_reward_zero_std": 0.0, "grad_norm": 2.721793437241765, "kl": 0.011199951171875, "learning_rate": 8.505790890692243e-07, "loss": 0.0242, "num_tokens": 74049808.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5063276290893555, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06569655842362646, "rewards/wordcountpos_reward/raw_geo/std": 0.07940958306986136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1081.375, "completions/mean_terminated_length": 1081.375, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.34086817363472693, "frac_reward_zero_std": 0.0, "grad_norm": 2.863599579829338, "kl": 0.01253509521484375, "learning_rate": 8.503451655771948e-07, "loss": 0.0339, "num_tokens": 74094214.0, "reward": 0.0, "reward_std": 0.6448909640312195, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06364293123674554, "rewards/wordcountpos_reward/raw_geo/std": 0.2746360791593829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1046.0, "completions/mean_terminated_length": 1046.0, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.34106821364272855, "frac_reward_zero_std": 0.0, "grad_norm": 3.267563112490123, "kl": 0.01422119140625, "learning_rate": 8.501110956354988e-07, "loss": -0.0126, "num_tokens": 74129758.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9310984015464783, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0129437121561379, "rewards/wordcountpos_reward/raw_geo/std": 0.07333047372044844, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1097.875, "completions/mean_terminated_length": 1071.0667724609375, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.34126825365073016, "frac_reward_zero_std": 0.0, "grad_norm": 3.4757385019163225, "kl": 0.0163726806640625, "learning_rate": 8.498768793582696e-07, "loss": 0.022, "num_tokens": 74171484.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9566295146942139, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0771710115266427, "rewards/wordcountpos_reward/raw_geo/std": 0.07190161675473866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1274.625, "completions/mean_terminated_length": 1259.60009765625, "completions/min_length": 1050.0, "completions/min_terminated_length": 1050.0, "epoch": 0.34146829365873177, "frac_reward_zero_std": 0.0, "grad_norm": 3.1815076127478883, "kl": 0.0146331787109375, "learning_rate": 8.496425168597123e-07, "loss": 0.03, "num_tokens": 74218190.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9715590476989746, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10582743987064228, "rewards/wordcountpos_reward/raw_geo/std": 0.12909915050369855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1034.0, "completions/max_terminated_length": 1034.0, "completions/mean_length": 824.5625, "completions/mean_terminated_length": 824.5625, "completions/min_length": 507.0, "completions/min_terminated_length": 507.0, "epoch": 0.3416683336667333, "frac_reward_zero_std": 0.0, "grad_norm": 4.0636772608073155, "kl": 0.02032470703125, "learning_rate": 8.494080082541033e-07, "loss": -0.0273, "num_tokens": 74258807.0, "reward": 0.0, "reward_std": 1.055046558380127, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.041128203939138915, "rewards/wordcountpos_reward/raw_geo/std": 0.04960585220645909, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1216.1875, "completions/mean_terminated_length": 1216.1875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.34186837367473494, "frac_reward_zero_std": 0.0, "grad_norm": 2.235041387731442, "kl": 0.0081634521484375, "learning_rate": 8.491733536557897e-07, "loss": -0.0322, "num_tokens": 74297890.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8786622285842896, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027645158851543762, "rewards/wordcountpos_reward/raw_geo/std": 0.13120135019487306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1197.6875, "completions/mean_terminated_length": 1197.6875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.34206841368273655, "frac_reward_zero_std": 0.0, "grad_norm": 3.593822411384142, "kl": 0.015533447265625, "learning_rate": 8.489385531791906e-07, "loss": -0.011, "num_tokens": 74337061.0, "reward": 0.0, "reward_std": 0.718343198299408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13991598180600234, "rewards/wordcountpos_reward/raw_geo/std": 0.15815555024164615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1200.75, "completions/mean_terminated_length": 1064.727294921875, "completions/min_length": 602.0, "completions/min_terminated_length": 602.0, "epoch": 0.34226845369073816, "frac_reward_zero_std": 0.0, "grad_norm": 3.1073338675302495, "kl": 0.01348876953125, "learning_rate": 8.487036069387953e-07, "loss": -0.0578, "num_tokens": 74388465.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0315226316452026, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022338886325019545, "rewards/wordcountpos_reward/raw_geo/std": 0.0532303633532224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1002.625, "completions/mean_terminated_length": 1002.625, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.3424684936987398, "frac_reward_zero_std": 0.0, "grad_norm": 3.5130571894412803, "kl": 0.0177001953125, "learning_rate": 8.484685150491649e-07, "loss": -0.0516, "num_tokens": 74440491.0, "reward": 7.450580596923828e-09, "reward_std": 1.0216412544250488, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04038896652015001, "rewards/wordcountpos_reward/raw_geo/std": 0.0636367448765158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1194.5625, "completions/mean_terminated_length": 1194.5625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.34266853370674133, "frac_reward_zero_std": 0.0, "grad_norm": 3.4022515331729477, "kl": 0.01947021484375, "learning_rate": 8.482332776249314e-07, "loss": -0.0066, "num_tokens": 74489412.0, "reward": 0.0, "reward_std": 0.916829526424408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06301875877379064, "rewards/wordcountpos_reward/raw_geo/std": 0.1003141807825282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1057600358603626, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1040.125, "completions/mean_terminated_length": 1009.4667358398438, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.34286857371474294, "frac_reward_zero_std": 0.0, "grad_norm": 3.4498298391737947, "kl": 0.0135040283203125, "learning_rate": 8.479978947807975e-07, "loss": 0.0049, "num_tokens": 74534982.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4400555193424225, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05426460704411065, "rewards/wordcountpos_reward/raw_geo/std": 0.24999964137988331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14395215254459456, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1216.5625, "completions/mean_terminated_length": 1122.0833740234375, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.34306861372274455, "frac_reward_zero_std": 0.0, "grad_norm": 3.64906445105514, "kl": 0.01861572265625, "learning_rate": 8.477623666315367e-07, "loss": 0.0323, "num_tokens": 74586655.0, "reward": 0.0, "reward_std": 0.8644471168518066, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1158785271774045, "rewards/wordcountpos_reward/raw_geo/std": 0.13817021228847876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1105.625, "completions/mean_terminated_length": 1049.2857666015625, "completions/min_length": 650.0, "completions/min_terminated_length": 650.0, "epoch": 0.34326865373074616, "frac_reward_zero_std": 0.0, "grad_norm": 3.4423004432671944, "kl": 0.0144805908203125, "learning_rate": 8.475266932919938e-07, "loss": 0.0119, "num_tokens": 74626809.0, "reward": 0.0, "reward_std": 0.8821020126342773, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10427966120452759, "rewards/wordcountpos_reward/raw_geo/std": 0.07495030962347883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 963.0, "completions/mean_terminated_length": 963.0, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.3434686937387478, "frac_reward_zero_std": 0.0, "grad_norm": 2.9666809465990736, "kl": 0.010528564453125, "learning_rate": 8.472908748770844e-07, "loss": -0.0108, "num_tokens": 74655441.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0130188465118408, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09752729899777007, "rewards/wordcountpos_reward/raw_geo/std": 0.1137234694640579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1114.625, "completions/mean_terminated_length": 1114.625, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.34366873374674933, "frac_reward_zero_std": 0.0, "grad_norm": 3.497303787984739, "kl": 0.01800537109375, "learning_rate": 8.470549115017944e-07, "loss": 0.0256, "num_tokens": 74710075.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9461817741394043, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05622239683286784, "rewards/wordcountpos_reward/raw_geo/std": 0.08157377962975434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15939701191492708, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1295.9375, "completions/mean_terminated_length": 1203.181884765625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.34386877375475094, "frac_reward_zero_std": 0.0, "grad_norm": 3.179209510266924, "kl": 0.01763916015625, "learning_rate": 8.468188032811806e-07, "loss": 0.0297, "num_tokens": 74755018.0, "reward": 2.9802322387695312e-08, "reward_std": 0.37994059920310974, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1355370921292624, "rewards/wordcountpos_reward/raw_geo/std": 0.21804350176150675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1220.75, "completions/mean_terminated_length": 1220.75, "completions/min_length": 1066.0, "completions/min_terminated_length": 1066.0, "epoch": 0.34406881376275256, "frac_reward_zero_std": 0.0, "grad_norm": 2.5203248021143954, "kl": 0.0106048583984375, "learning_rate": 8.465825503303705e-07, "loss": 0.0075, "num_tokens": 74799982.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0460631847381592, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012841327235359567, "rewards/wordcountpos_reward/raw_geo/std": 0.21718124609126793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1268.375, "completions/mean_terminated_length": 1252.933349609375, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.34426885377075417, "frac_reward_zero_std": 0.0, "grad_norm": 2.777431283159697, "kl": 0.0111236572265625, "learning_rate": 8.463461527645621e-07, "loss": -0.0117, "num_tokens": 74841892.0, "reward": -2.9802322387695312e-08, "reward_std": 1.00283944606781, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0600485330930281, "rewards/wordcountpos_reward/raw_geo/std": 0.049325751004679286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1093.75, "completions/mean_terminated_length": 1093.75, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.3444688937787557, "frac_reward_zero_std": 0.0, "grad_norm": 3.63843566419292, "kl": 0.015838623046875, "learning_rate": 8.461096106990241e-07, "loss": -0.0018, "num_tokens": 74878824.0, "reward": 0.0, "reward_std": 0.7780653238296509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061234134472009136, "rewards/wordcountpos_reward/raw_geo/std": 0.29139884303553054, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1196.125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.34466893378675734, "frac_reward_zero_std": 0.0, "grad_norm": 2.6202245138556237, "kl": 0.0119781494140625, "learning_rate": 8.458729242490951e-07, "loss": 0.0139, "num_tokens": 74922882.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8111221790313721, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.056166992934110024, "rewards/wordcountpos_reward/raw_geo/std": 0.09104621663299794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 896.625, "completions/mean_terminated_length": 896.625, "completions/min_length": 504.0, "completions/min_terminated_length": 504.0, "epoch": 0.34486897379475895, "frac_reward_zero_std": 0.0, "grad_norm": 4.12774794273736, "kl": 0.016754150390625, "learning_rate": 8.456360935301849e-07, "loss": 0.0251, "num_tokens": 74973724.0, "reward": -7.450580596923828e-09, "reward_std": 0.9670534133911133, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.008813059183389977, "rewards/wordcountpos_reward/raw_geo/std": 0.017488180877025798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1261.875, "completions/mean_terminated_length": 1227.857177734375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.34506901380276056, "frac_reward_zero_std": 0.0, "grad_norm": 2.359533574364688, "kl": 0.0131378173828125, "learning_rate": 8.453991186577727e-07, "loss": -0.0105, "num_tokens": 75027362.0, "reward": 0.0, "reward_std": 0.824561357498169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16705782248396822, "rewards/wordcountpos_reward/raw_geo/std": 0.2882327883423054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.34526905381076217, "frac_reward_zero_std": 0.0, "grad_norm": 3.4565125891817323, "kl": 0.0194091796875, "learning_rate": 8.451619997474093e-07, "loss": 0.0031, "num_tokens": 75078711.0, "reward": 0.0, "reward_std": 0.6755422949790955, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07898029709678793, "rewards/wordcountpos_reward/raw_geo/std": 0.12465419440453065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1186.875, "completions/mean_terminated_length": 1166.0, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.34546909381876373, "frac_reward_zero_std": 0.0, "grad_norm": 3.3464979128691312, "kl": 0.02032470703125, "learning_rate": 8.449247369147143e-07, "loss": -0.0064, "num_tokens": 75132509.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4986012578010559, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06003935317517118, "rewards/wordcountpos_reward/raw_geo/std": 0.15599631206994707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1242.125, "completions/mean_terminated_length": 1224.933349609375, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.34566913382676534, "frac_reward_zero_std": 0.0, "grad_norm": 3.1969694187826385, "kl": 0.018463134765625, "learning_rate": 8.446873302753783e-07, "loss": 0.0129, "num_tokens": 75170847.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9715031385421753, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04690511093353454, "rewards/wordcountpos_reward/raw_geo/std": 0.1236068764265948, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 969.3125, "completions/mean_terminated_length": 969.3125, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.34586917383476695, "frac_reward_zero_std": 0.0, "grad_norm": 3.797458236038755, "kl": 0.0167999267578125, "learning_rate": 8.44449779945162e-07, "loss": 0.0034, "num_tokens": 75220660.0, "reward": -2.9802322387695312e-08, "reward_std": 0.944270133972168, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1341032308323001, "rewards/wordcountpos_reward/raw_geo/std": 0.17118007392543386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1113.0625, "completions/mean_terminated_length": 1113.0625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.34606921384276856, "frac_reward_zero_std": 0.0, "grad_norm": 2.5753878419553655, "kl": 0.0115203857421875, "learning_rate": 8.442120860398958e-07, "loss": 0.0109, "num_tokens": 75263789.0, "reward": 0.0, "reward_std": 0.9938139915466309, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09399468579210593, "rewards/wordcountpos_reward/raw_geo/std": 0.05059893019944131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1114.0, "completions/mean_terminated_length": 1114.0, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.3462692538507702, "frac_reward_zero_std": 0.0, "grad_norm": 3.684548990214174, "kl": 0.0160064697265625, "learning_rate": 8.439742486754806e-07, "loss": -0.0714, "num_tokens": 75316861.0, "reward": -1.4901161193847656e-08, "reward_std": 1.032321810722351, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06998370050948025, "rewards/wordcountpos_reward/raw_geo/std": 0.06990183708693114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1123.75, "completions/mean_terminated_length": 998.3333740234375, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.34646929385877173, "frac_reward_zero_std": 0.0, "grad_norm": 3.809040623114236, "kl": 0.017364501953125, "learning_rate": 8.437362679678868e-07, "loss": 0.0489, "num_tokens": 75356681.0, "reward": 7.450580596923828e-09, "reward_std": 1.0470911264419556, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08040011000038194, "rewards/wordcountpos_reward/raw_geo/std": 0.05801351273338138, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1085.0, "completions/mean_terminated_length": 1057.3333740234375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.34666933386677334, "frac_reward_zero_std": 0.0, "grad_norm": 2.7856413334773475, "kl": 0.012847900390625, "learning_rate": 8.434981440331549e-07, "loss": -0.0433, "num_tokens": 75397129.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0442838668823242, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12539632860729288, "rewards/wordcountpos_reward/raw_geo/std": 0.09553661606522137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1152.25, "completions/mean_terminated_length": 1152.25, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.34686937387477496, "frac_reward_zero_std": 0.0, "grad_norm": 2.730180641953594, "kl": 0.01031494140625, "learning_rate": 8.432598769873952e-07, "loss": 0.0303, "num_tokens": 75445885.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8651590347290039, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09145136700843742, "rewards/wordcountpos_reward/raw_geo/std": 0.060933200675037755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1246.0625, "completions/mean_terminated_length": 1229.1334228515625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.34706941388277657, "frac_reward_zero_std": 0.0, "grad_norm": 2.7880250306512147, "kl": 0.012237548828125, "learning_rate": 8.430214669467879e-07, "loss": 0.0286, "num_tokens": 75488278.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8757525682449341, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009141063894497122, "rewards/wordcountpos_reward/raw_geo/std": 0.13725018844144185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1180.1875, "completions/mean_terminated_length": 1134.5, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.3472694538907782, "frac_reward_zero_std": 0.0, "grad_norm": 3.1860516228123426, "kl": 0.0158538818359375, "learning_rate": 8.427829140275826e-07, "loss": -0.0216, "num_tokens": 75534937.0, "reward": 0.0, "reward_std": 0.997195839881897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1130510860792016, "rewards/wordcountpos_reward/raw_geo/std": 0.16753564914419058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 987.3125, "completions/mean_terminated_length": 987.3125, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.34746949389877974, "frac_reward_zero_std": 0.0, "grad_norm": 2.32425010820382, "kl": 0.00731658935546875, "learning_rate": 8.42544218346099e-07, "loss": -0.0094, "num_tokens": 75575374.0, "reward": 2.9802322387695312e-08, "reward_std": 0.439910352230072, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17680958607407699, "rewards/wordcountpos_reward/raw_geo/std": 0.21620985673016077, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1290.3125, "completions/mean_terminated_length": 1276.3333740234375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.34766953390678135, "frac_reward_zero_std": 0.0, "grad_norm": 2.5325818624180876, "kl": 0.0121612548828125, "learning_rate": 8.423053800187261e-07, "loss": -0.019, "num_tokens": 75622355.0, "reward": 0.0, "reward_std": 1.029433250427246, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2888202847905007, "rewards/wordcountpos_reward/raw_geo/std": 0.1869594518592459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1201.875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.34786957391478296, "frac_reward_zero_std": 0.0, "grad_norm": 3.4965371075446217, "kl": 0.0186920166015625, "learning_rate": 8.420663991619226e-07, "loss": 0.0226, "num_tokens": 75674473.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8095927238464355, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.002478753896322062, "rewards/wordcountpos_reward/raw_geo/std": 0.09118875143142491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1116.125, "completions/mean_terminated_length": 1116.125, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.34806961392278457, "frac_reward_zero_std": 0.0, "grad_norm": 2.909622471583735, "kl": 0.0126800537109375, "learning_rate": 8.418272758922167e-07, "loss": 0.012, "num_tokens": 75721403.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9980233907699585, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07849789942027535, "rewards/wordcountpos_reward/raw_geo/std": 0.07002551387853127, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1037.0, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.3482696539307862, "frac_reward_zero_std": 0.0, "grad_norm": 2.207014297202443, "kl": 0.008941650390625, "learning_rate": 8.415880103262059e-07, "loss": -0.0017, "num_tokens": 75776411.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0572911500930786, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015331305535608606, "rewards/wordcountpos_reward/raw_geo/std": 0.07852050800000888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1182.3125, "completions/mean_terminated_length": 1182.3125, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.34846969393878774, "frac_reward_zero_std": 0.0, "grad_norm": 1.6067144401784816, "kl": 0.00481414794921875, "learning_rate": 8.41348602580557e-07, "loss": -0.0003, "num_tokens": 75813456.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9599651098251343, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12137200283545012, "rewards/wordcountpos_reward/raw_geo/std": 0.07096277819320206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1216.375, "completions/mean_terminated_length": 1197.4666748046875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.34866973394678935, "frac_reward_zero_std": 0.0, "grad_norm": 2.9578251346374342, "kl": 0.01739501953125, "learning_rate": 8.411090527720066e-07, "loss": -0.0317, "num_tokens": 75852774.0, "reward": -7.450580596923828e-09, "reward_std": 1.0665870904922485, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.019445109283162974, "rewards/wordcountpos_reward/raw_geo/std": 0.10738324269923563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.14089659985908765, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1338.875, "completions/mean_terminated_length": 1265.6363525390625, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.34886977395479096, "frac_reward_zero_std": 0.0, "grad_norm": 3.194055635114125, "kl": 0.0172119140625, "learning_rate": 8.408693610173603e-07, "loss": -0.0052, "num_tokens": 75899060.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9547780752182007, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2261594942519671, "rewards/wordcountpos_reward/raw_geo/std": 0.10865201701639123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752093, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1015.9375, "completions/mean_terminated_length": 1015.9375, "completions/min_length": 630.0, "completions/min_terminated_length": 630.0, "epoch": 0.3490698139627926, "frac_reward_zero_std": 0.0, "grad_norm": 3.906406807819015, "kl": 0.0156402587890625, "learning_rate": 8.406295274334926e-07, "loss": 0.0393, "num_tokens": 75944059.0, "reward": 0.0, "reward_std": 0.9560251235961914, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11774425628935953, "rewards/wordcountpos_reward/raw_geo/std": 0.06312923728325064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1147.0, "completions/mean_terminated_length": 1096.571533203125, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.3492698539707942, "frac_reward_zero_std": 0.0, "grad_norm": 3.2051816922772183, "kl": 0.0120849609375, "learning_rate": 8.403895521373476e-07, "loss": 0.0615, "num_tokens": 75978387.0, "reward": 0.0, "reward_std": 0.9096341133117676, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21622720739252863, "rewards/wordcountpos_reward/raw_geo/std": 0.13055362001242565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1178.8125, "completions/mean_terminated_length": 1104.6923828125, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.34946989397879574, "frac_reward_zero_std": 0.0, "grad_norm": 3.176936657223504, "kl": 0.014678955078125, "learning_rate": 8.401494352459384e-07, "loss": 0.0126, "num_tokens": 76021520.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0628066062927246, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12319832703928207, "rewards/wordcountpos_reward/raw_geo/std": 0.15112797179986387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1080.125, "completions/mean_terminated_length": 1080.125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.34966993398679735, "frac_reward_zero_std": 0.0, "grad_norm": 3.423460930142355, "kl": 0.0159759521484375, "learning_rate": 8.39909176876347e-07, "loss": 0.0112, "num_tokens": 76071522.0, "reward": 0.0, "reward_std": 0.6338709592819214, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.169872426761819, "rewards/wordcountpos_reward/raw_geo/std": 0.1506294649817365, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1180.933349609375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.34986997399479897, "frac_reward_zero_std": 0.0, "grad_norm": 3.1314582897293115, "kl": 0.0126495361328125, "learning_rate": 8.396687771457245e-07, "loss": -0.0207, "num_tokens": 76106608.0, "reward": 0.0, "reward_std": 0.8720186948776245, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1744394013619878, "rewards/wordcountpos_reward/raw_geo/std": 0.12667434068599262, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1083.25, "completions/mean_terminated_length": 1083.25, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.3500700140028006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1063834124273377, "kl": 0.01104736328125, "learning_rate": 8.39428236171291e-07, "loss": 0.0088, "num_tokens": 76139332.0, "reward": 0.0, "reward_std": 1.0650572776794434, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0967017304476083, "rewards/wordcountpos_reward/raw_geo/std": 0.3101797621178311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1147.8333740234375, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.35027005401080213, "frac_reward_zero_std": 0.0, "grad_norm": 3.31142509435829, "kl": 0.0143280029296875, "learning_rate": 8.391875540703353e-07, "loss": -0.007, "num_tokens": 76189370.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8450350165367126, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07960390048839829, "rewards/wordcountpos_reward/raw_geo/std": 0.16827951928991716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 912.75, "completions/mean_terminated_length": 912.75, "completions/min_length": 586.0, "completions/min_terminated_length": 586.0, "epoch": 0.35047009401880375, "frac_reward_zero_std": 0.0, "grad_norm": 3.019101860287366, "kl": 0.0133209228515625, "learning_rate": 8.389467309602149e-07, "loss": -0.0182, "num_tokens": 76219222.0, "reward": 0.0, "reward_std": 1.0596234798431396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1749562641479956, "rewards/wordcountpos_reward/raw_geo/std": 0.07395599709580569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 909.5625, "completions/mean_terminated_length": 909.5625, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.35067013402680536, "frac_reward_zero_std": 0.0, "grad_norm": 3.442055777582219, "kl": 0.013916015625, "learning_rate": 8.387057669583564e-07, "loss": -0.0156, "num_tokens": 76251567.0, "reward": 0.0, "reward_std": 0.8959325551986694, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058975670117623454, "rewards/wordcountpos_reward/raw_geo/std": 0.09082896561303558, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1100.875, "completions/mean_terminated_length": 1074.2667236328125, "completions/min_length": 711.0, "completions/min_terminated_length": 711.0, "epoch": 0.35087017403480697, "frac_reward_zero_std": 0.0, "grad_norm": 3.656090652636485, "kl": 0.017181396484375, "learning_rate": 8.38464662182255e-07, "loss": 0.0486, "num_tokens": 76302085.0, "reward": 0.0, "reward_std": 0.8695361018180847, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008367596152448373, "rewards/wordcountpos_reward/raw_geo/std": 0.044893141799502344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1175.625, "completions/mean_terminated_length": 1154.0, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.3510702140428086, "frac_reward_zero_std": 0.0, "grad_norm": 3.0395418994086687, "kl": 0.0147247314453125, "learning_rate": 8.382234167494747e-07, "loss": 0.0273, "num_tokens": 76343591.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0580565929412842, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23148631681277232, "rewards/wordcountpos_reward/raw_geo/std": 0.2582161452010852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1021.5, "completions/mean_terminated_length": 1021.5, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.35127025405081014, "frac_reward_zero_std": 0.0, "grad_norm": 3.3850257953326577, "kl": 0.01971435546875, "learning_rate": 8.379820307776472e-07, "loss": -0.0399, "num_tokens": 76385975.0, "reward": 0.0, "reward_std": 0.8627417087554932, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1063942083960841, "rewards/wordcountpos_reward/raw_geo/std": 0.11656831947156639, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 898.75, "completions/mean_terminated_length": 898.75, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.35147029405881175, "frac_reward_zero_std": 0.0, "grad_norm": 3.744633595515264, "kl": 0.0173492431640625, "learning_rate": 8.377405043844739e-07, "loss": -0.0064, "num_tokens": 76425803.0, "reward": 0.0, "reward_std": 0.8662160634994507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08285859887325972, "rewards/wordcountpos_reward/raw_geo/std": 0.14720409028205564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1176.75, "completions/mean_terminated_length": 1155.2000732421875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.35167033406681336, "frac_reward_zero_std": 0.0, "grad_norm": 3.733777188519865, "kl": 0.0174713134765625, "learning_rate": 8.374988376877241e-07, "loss": 0.0106, "num_tokens": 76467143.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9899064302444458, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0010663061631800974, "rewards/wordcountpos_reward/raw_geo/std": 0.09378112727120681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1083.625, "completions/mean_terminated_length": 1055.86669921875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.351870374074815, "frac_reward_zero_std": 0.0, "grad_norm": 3.004270012403573, "kl": 0.01458740234375, "learning_rate": 8.372570308052356e-07, "loss": -0.0076, "num_tokens": 76496665.0, "reward": 0.0, "reward_std": 0.5205124616622925, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0835022272872219, "rewards/wordcountpos_reward/raw_geo/std": 0.10942189775086408, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1261.625, "completions/mean_terminated_length": 1206.615478515625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.3520704140828166, "frac_reward_zero_std": 0.0, "grad_norm": 3.37723527024958, "kl": 0.016510009765625, "learning_rate": 8.370150838549143e-07, "loss": -0.0145, "num_tokens": 76548051.0, "reward": 0.0, "reward_std": 0.6273642778396606, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.029543889821512806, "rewards/wordcountpos_reward/raw_geo/std": 0.2024231662227156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1085.9375, "completions/mean_terminated_length": 990.3846435546875, "completions/min_length": 570.0, "completions/min_terminated_length": 570.0, "epoch": 0.35227045409081814, "frac_reward_zero_std": 0.0, "grad_norm": 2.3739715401346904, "kl": 0.0108642578125, "learning_rate": 8.36772996954735e-07, "loss": -0.0173, "num_tokens": 76590218.0, "reward": 0.0, "reward_std": 1.0399260520935059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01047343397234857, "rewards/wordcountpos_reward/raw_geo/std": 0.09854378166513973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1214.4375, "completions/mean_terminated_length": 1214.4375, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.35247049409881975, "frac_reward_zero_std": 0.0, "grad_norm": 3.168514414031809, "kl": 0.0147857666015625, "learning_rate": 8.365307702227402e-07, "loss": 0.0172, "num_tokens": 76629673.0, "reward": 0.0, "reward_std": 0.8143726587295532, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03433872607048457, "rewards/wordcountpos_reward/raw_geo/std": 0.1034153093498973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 946.4375, "completions/mean_terminated_length": 946.4375, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.35267053410682137, "frac_reward_zero_std": 0.0, "grad_norm": 3.32028351732439, "kl": 0.0130462646484375, "learning_rate": 8.362884037770406e-07, "loss": 0.0016, "num_tokens": 76671632.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0393953323364258, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10284659422672278, "rewards/wordcountpos_reward/raw_geo/std": 0.10992351338458453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1168.71435546875, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.352870574114823, "frac_reward_zero_std": 0.0, "grad_norm": 2.9190152704108017, "kl": 0.012481689453125, "learning_rate": 8.360458977358153e-07, "loss": -0.0071, "num_tokens": 76719610.0, "reward": 0.0, "reward_std": 0.9037257432937622, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23087200788901413, "rewards/wordcountpos_reward/raw_geo/std": 0.08185484576012153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 845.888916015625, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.3530706141228246, "frac_reward_zero_std": 0.0, "grad_norm": 2.996926037574471, "kl": 0.012237548828125, "learning_rate": 8.358032522173114e-07, "loss": 0.002, "num_tokens": 76763067.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0625553131103516, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10718183920250576, "rewards/wordcountpos_reward/raw_geo/std": 0.09454480939161107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1141.125, "completions/mean_terminated_length": 1141.125, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.35327065413082614, "frac_reward_zero_std": 0.0, "grad_norm": 3.4301525780618216, "kl": 0.017669677734375, "learning_rate": 8.35560467339844e-07, "loss": 0.0086, "num_tokens": 76816261.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6547678709030151, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09361613549072692, "rewards/wordcountpos_reward/raw_geo/std": 0.07929776414886555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1022.0, "completions/mean_terminated_length": 1022.0, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.35347069413882776, "frac_reward_zero_std": 0.0, "grad_norm": 3.5552046928640513, "kl": 0.019317626953125, "learning_rate": 8.353175432217959e-07, "loss": -0.0296, "num_tokens": 76857301.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8594515323638916, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09115010401761779, "rewards/wordcountpos_reward/raw_geo/std": 0.06978551704640325, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1137.8125, "completions/mean_terminated_length": 1113.666748046875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.35367073414682937, "frac_reward_zero_std": 0.0, "grad_norm": 3.5134054589999293, "kl": 0.017913818359375, "learning_rate": 8.35074479981618e-07, "loss": 0.0267, "num_tokens": 76908578.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6233526468276978, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14070246879262074, "rewards/wordcountpos_reward/raw_geo/std": 0.13427832917434657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1143.3125, "completions/mean_terminated_length": 1119.533447265625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.353870774154831, "frac_reward_zero_std": 0.0, "grad_norm": 3.4394738247277834, "kl": 0.0155487060546875, "learning_rate": 8.348312777378293e-07, "loss": 0.0055, "num_tokens": 76952119.0, "reward": 0.0, "reward_std": 0.3432086110115051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025926713601043035, "rewards/wordcountpos_reward/raw_geo/std": 0.18160445002803968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1018.5625, "completions/mean_terminated_length": 1018.5625, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.3540708141628326, "frac_reward_zero_std": 0.0, "grad_norm": 3.165086856832091, "kl": 0.0156707763671875, "learning_rate": 8.345879366090164e-07, "loss": 0.0321, "num_tokens": 76999672.0, "reward": 0.0, "reward_std": 1.0659531354904175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02539357552940831, "rewards/wordcountpos_reward/raw_geo/std": 0.233092306338323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1307.625, "completions/mean_terminated_length": 1158.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.35427085417083415, "frac_reward_zero_std": 0.0, "grad_norm": 3.1976807096972037, "kl": 0.0146484375, "learning_rate": 8.343444567138331e-07, "loss": -0.059, "num_tokens": 77053842.0, "reward": 2.9802322387695312e-08, "reward_std": 0.845935046672821, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03995322137849134, "rewards/wordcountpos_reward/raw_geo/std": 0.16344018957257903, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1249.0625, "completions/mean_terminated_length": 1191.1539306640625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.35447089417883576, "frac_reward_zero_std": 0.0, "grad_norm": 3.0944915475823533, "kl": 0.0135498046875, "learning_rate": 8.341008381710015e-07, "loss": -0.0275, "num_tokens": 77107307.0, "reward": -7.450580596923828e-09, "reward_std": 1.0531097650527954, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.17963238129819156, "rewards/wordcountpos_reward/raw_geo/std": 0.08655035043001716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353542, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1060.0, "completions/mean_terminated_length": 1030.666748046875, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.35467093418683737, "frac_reward_zero_std": 0.0, "grad_norm": 2.244421068701371, "kl": 0.006877899169921875, "learning_rate": 8.338570810993111e-07, "loss": -0.0262, "num_tokens": 77146227.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0150959491729736, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18955963082929525, "rewards/wordcountpos_reward/raw_geo/std": 0.13371337517066578, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1044.375, "completions/mean_terminated_length": 1044.375, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.354870974194839, "frac_reward_zero_std": 0.0, "grad_norm": 3.2089717010263716, "kl": 0.0108642578125, "learning_rate": 8.336131856176192e-07, "loss": -0.0224, "num_tokens": 77179665.0, "reward": 0.0, "reward_std": 0.7413496971130371, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17749997584678434, "rewards/wordcountpos_reward/raw_geo/std": 0.08729281042438872, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1251.75, "completions/mean_terminated_length": 1235.2000732421875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.3550710142028406, "frac_reward_zero_std": 0.0, "grad_norm": 2.757737208480071, "kl": 0.0124053955078125, "learning_rate": 8.3336915184485e-07, "loss": -0.0491, "num_tokens": 77228437.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5085335373878479, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05196947074689537, "rewards/wordcountpos_reward/raw_geo/std": 0.12807978933516145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.17888543819998318, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1164.375, "completions/mean_terminated_length": 1164.375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.35527105421084215, "frac_reward_zero_std": 0.0, "grad_norm": 2.1112946781110837, "kl": 0.0085906982421875, "learning_rate": 8.331249798999957e-07, "loss": -0.0377, "num_tokens": 77280243.0, "reward": 0.0, "reward_std": 0.8375033140182495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08948196588720646, "rewards/wordcountpos_reward/raw_geo/std": 0.10307202962667876, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1127.4375, "completions/mean_terminated_length": 903.9000244140625, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.35547109421884376, "frac_reward_zero_std": 0.0, "grad_norm": 3.0071592494705013, "kl": 0.011260986328125, "learning_rate": 8.328806699021155e-07, "loss": -0.039, "num_tokens": 77323906.0, "reward": 3.725290298461914e-09, "reward_std": 1.045759916305542, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.062323154744563236, "rewards/wordcountpos_reward/raw_geo/std": 0.07456624149110516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1102.9375, "completions/mean_terminated_length": 1102.9375, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3556711342268454, "frac_reward_zero_std": 0.0, "grad_norm": 3.4477285194911587, "kl": 0.0173187255859375, "learning_rate": 8.32636221970336e-07, "loss": 0.0096, "num_tokens": 77362897.0, "reward": 0.0, "reward_std": 0.6526699066162109, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18625682606729063, "rewards/wordcountpos_reward/raw_geo/std": 0.2808035713316073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 954.8125, "completions/mean_terminated_length": 954.8125, "completions/min_length": 520.0, "completions/min_terminated_length": 520.0, "epoch": 0.355871174234847, "frac_reward_zero_std": 0.0, "grad_norm": 3.262044057617541, "kl": 0.0113677978515625, "learning_rate": 8.323916362238514e-07, "loss": -0.0185, "num_tokens": 77401254.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8943012356758118, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014424672357033647, "rewards/wordcountpos_reward/raw_geo/std": 0.11751598132442093, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 995.5, "completions/mean_terminated_length": 995.5, "completions/min_length": 627.0, "completions/min_terminated_length": 627.0, "epoch": 0.35607121424284854, "frac_reward_zero_std": 0.0, "grad_norm": 3.1634840410757237, "kl": 0.0244293212890625, "learning_rate": 8.321469127819227e-07, "loss": -0.0083, "num_tokens": 77447302.0, "reward": 0.0, "reward_std": 0.6351709365844727, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21175268534125855, "rewards/wordcountpos_reward/raw_geo/std": 0.225294773399291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1245.1875, "completions/mean_terminated_length": 1092.300048828125, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.35627125425085016, "frac_reward_zero_std": 0.0, "grad_norm": 3.1426916488304095, "kl": 0.0146331787109375, "learning_rate": 8.31902051763878e-07, "loss": -0.0137, "num_tokens": 77491857.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9163140058517456, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015321193783817896, "rewards/wordcountpos_reward/raw_geo/std": 0.05993491944938508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1039.0625, "completions/mean_terminated_length": 1039.0625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.35647129425885177, "frac_reward_zero_std": 0.0, "grad_norm": 3.3318399238376086, "kl": 0.01934814453125, "learning_rate": 8.316570532891128e-07, "loss": -0.0687, "num_tokens": 77528714.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9354727268218994, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08987492245923824, "rewards/wordcountpos_reward/raw_geo/std": 0.07136254523261959, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1071.75, "completions/mean_terminated_length": 1071.75, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.3566713342668534, "frac_reward_zero_std": 0.0, "grad_norm": 3.455631229743058, "kl": 0.017608642578125, "learning_rate": 8.314119174770893e-07, "loss": -0.0208, "num_tokens": 77570486.0, "reward": 0.0, "reward_std": 0.8518043756484985, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04871386234198273, "rewards/wordcountpos_reward/raw_geo/std": 0.10190213861526806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1085.8125, "completions/mean_terminated_length": 1085.8125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.356871374274855, "frac_reward_zero_std": 0.0, "grad_norm": 3.4100870022398997, "kl": 0.019775390625, "learning_rate": 8.311666444473372e-07, "loss": 0.0125, "num_tokens": 77614355.0, "reward": -4.0978193283081055e-08, "reward_std": 0.8546255826950073, "rewards/wordcountpos_reward/mean": -4.0978193283081055e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059011478210088786, "rewards/wordcountpos_reward/raw_geo/std": 0.0808333892516301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 971.8125, "completions/mean_terminated_length": 971.8125, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.35707141428285655, "frac_reward_zero_std": 0.0, "grad_norm": 3.3795069928788344, "kl": 0.0184173583984375, "learning_rate": 8.309212343194524e-07, "loss": 0.0101, "num_tokens": 77654272.0, "reward": 2.9802322387695312e-08, "reward_std": 0.41825148463249207, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20585509560751594, "rewards/wordcountpos_reward/raw_geo/std": 0.31986154025481617, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1041.4375, "completions/mean_terminated_length": 1041.4375, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.35727145429085816, "frac_reward_zero_std": 0.0, "grad_norm": 2.9447435996897733, "kl": 0.01152801513671875, "learning_rate": 8.306756872130981e-07, "loss": 0.0089, "num_tokens": 77695927.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0381940603256226, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03888574278295472, "rewards/wordcountpos_reward/raw_geo/std": 0.14458618401927192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1344.9375, "completions/mean_terminated_length": 1189.875, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.35747149429885977, "frac_reward_zero_std": 0.0, "grad_norm": 2.8578058942876385, "kl": 0.0134735107421875, "learning_rate": 8.304300032480043e-07, "loss": -0.0646, "num_tokens": 77751822.0, "reward": 0.0, "reward_std": 0.8696860074996948, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20234087789731361, "rewards/wordcountpos_reward/raw_geo/std": 0.2671582198031056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1322.4375, "completions/mean_terminated_length": 1310.60009765625, "completions/min_length": 1192.0, "completions/min_terminated_length": 1192.0, "epoch": 0.3576715343068614, "frac_reward_zero_std": 0.0, "grad_norm": 3.0353406386246156, "kl": 0.0142364501953125, "learning_rate": 8.301841825439674e-07, "loss": 0.0295, "num_tokens": 77803645.0, "reward": 0.0, "reward_std": 0.786299467086792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03207476653092414, "rewards/wordcountpos_reward/raw_geo/std": 0.14288910554864873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1097.5625, "completions/mean_terminated_length": 1070.7333984375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.357871574314863, "frac_reward_zero_std": 0.0, "grad_norm": 3.227660610138832, "kl": 0.0164642333984375, "learning_rate": 8.299382252208508e-07, "loss": 0.0033, "num_tokens": 77855214.0, "reward": -4.470348358154297e-08, "reward_std": 0.9033418297767639, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036960299769436405, "rewards/wordcountpos_reward/raw_geo/std": 0.06914520166885942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125757, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1052.1875, "completions/mean_terminated_length": 1052.1875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.35807161432286455, "frac_reward_zero_std": 0.0, "grad_norm": 3.5242901139041134, "kl": 0.0148773193359375, "learning_rate": 8.296921313985845e-07, "loss": -0.0139, "num_tokens": 77892489.0, "reward": 0.0, "reward_std": 0.9171948432922363, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06865091122028667, "rewards/wordcountpos_reward/raw_geo/std": 0.03947434604465728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1130.3125, "completions/mean_terminated_length": 1130.3125, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.35827165433086616, "frac_reward_zero_std": 0.0, "grad_norm": 3.290439707470091, "kl": 0.0160369873046875, "learning_rate": 8.294459011971648e-07, "loss": -0.0106, "num_tokens": 77936766.0, "reward": 3.725290298461914e-08, "reward_std": 1.065662145614624, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21022650979966945, "rewards/wordcountpos_reward/raw_geo/std": 0.06940481212345954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 1016.0, "completions/mean_terminated_length": 1016.0, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.3584716943388678, "frac_reward_zero_std": 0.0, "grad_norm": 3.1474353577263376, "kl": 0.014129638671875, "learning_rate": 8.291995347366549e-07, "loss": -0.0008, "num_tokens": 77982502.0, "reward": 4.470348358154297e-08, "reward_std": 0.9738003611564636, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026166755509749413, "rewards/wordcountpos_reward/raw_geo/std": 0.05185383738303213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1147.8125, "completions/mean_terminated_length": 1147.8125, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.3586717343468694, "frac_reward_zero_std": 0.0, "grad_norm": 3.4636368111927873, "kl": 0.021484375, "learning_rate": 8.289530321371838e-07, "loss": -0.0086, "num_tokens": 78034843.0, "reward": 2.60770320892334e-08, "reward_std": 0.9589567184448242, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017555790945825253, "rewards/wordcountpos_reward/raw_geo/std": 0.0765506395894115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1136.5, "completions/mean_terminated_length": 1084.571533203125, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.358871774354871, "frac_reward_zero_std": 0.0, "grad_norm": 3.478633424768702, "kl": 0.01849365234375, "learning_rate": 8.287063935189477e-07, "loss": -0.0639, "num_tokens": 78086011.0, "reward": 0.0, "reward_std": 0.4624847173690796, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025231276556521638, "rewards/wordcountpos_reward/raw_geo/std": 0.09838948488047025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1192.5, "completions/mean_terminated_length": 1172.0001220703125, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.35907181436287255, "frac_reward_zero_std": 0.0, "grad_norm": 3.1218685952297087, "kl": 0.0150299072265625, "learning_rate": 8.284596190022084e-07, "loss": -0.007, "num_tokens": 78128323.0, "reward": -3.725290298461914e-09, "reward_std": 0.9593905210494995, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10021236115452767, "rewards/wordcountpos_reward/raw_geo/std": 0.24870041245817823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1211.75, "completions/mean_terminated_length": 1192.533447265625, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.35927185437087417, "frac_reward_zero_std": 0.0, "grad_norm": 2.741819628650346, "kl": 0.0112457275390625, "learning_rate": 8.282127087072945e-07, "loss": 0.0092, "num_tokens": 78176863.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9650435447692871, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016194140196833164, "rewards/wordcountpos_reward/raw_geo/std": 0.06246765301296056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1198.625, "completions/mean_terminated_length": 1098.166748046875, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.3594718943788758, "frac_reward_zero_std": 0.0, "grad_norm": 2.658873597014003, "kl": 0.0145416259765625, "learning_rate": 8.279656627546006e-07, "loss": 0.0426, "num_tokens": 78224145.0, "reward": 0.0, "reward_std": 0.9768966436386108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11823837943433536, "rewards/wordcountpos_reward/raw_geo/std": 0.16374966888855674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 879.4375, "completions/mean_terminated_length": 879.4375, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.3596719343868774, "frac_reward_zero_std": 0.0, "grad_norm": 3.4756289237640314, "kl": 0.0147857666015625, "learning_rate": 8.277184812645872e-07, "loss": -0.0306, "num_tokens": 78251392.0, "reward": 0.0, "reward_std": 0.8145972490310669, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07989463934851469, "rewards/wordcountpos_reward/raw_geo/std": 0.10843930532673866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.359871974394879, "frac_reward_zero_std": 0.0, "grad_norm": 1.50562663949324, "kl": 0.005107879638671875, "learning_rate": 8.274711643577812e-07, "loss": -0.014, "num_tokens": 78286289.0, "reward": 0.0, "reward_std": 0.5736739039421082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.004131526745284645, "rewards/wordcountpos_reward/raw_geo/std": 0.05687678880133583, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13326387079497304, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 980.9375, "completions/mean_terminated_length": 980.9375, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.36007201440288056, "frac_reward_zero_std": 0.0, "grad_norm": 2.0084882948467606, "kl": 0.004909515380859375, "learning_rate": 8.272237121547755e-07, "loss": -0.0221, "num_tokens": 78319024.0, "reward": 0.0, "reward_std": 0.6539640426635742, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06237594292814795, "rewards/wordcountpos_reward/raw_geo/std": 0.09414410926475156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1058.625, "completions/mean_terminated_length": 1058.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.36027205441088217, "frac_reward_zero_std": 0.0, "grad_norm": 3.7825485336227467, "kl": 0.01611328125, "learning_rate": 8.26976124776229e-07, "loss": -0.0002, "num_tokens": 78355426.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9959114789962769, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014423863406516265, "rewards/wordcountpos_reward/raw_geo/std": 0.06680053221513364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 986.5, "completions/mean_terminated_length": 986.5, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.3604720944188838, "frac_reward_zero_std": 0.0, "grad_norm": 3.48257815338035, "kl": 0.016845703125, "learning_rate": 8.267284023428663e-07, "loss": -0.0355, "num_tokens": 78398442.0, "reward": 0.0, "reward_std": 0.9134250283241272, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016927365041500324, "rewards/wordcountpos_reward/raw_geo/std": 0.04207980325940796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1116.375, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.3606721344268854, "frac_reward_zero_std": 0.0, "grad_norm": 2.2265140921951354, "kl": 0.0116119384765625, "learning_rate": 8.264805449754781e-07, "loss": -0.0177, "num_tokens": 78449560.0, "reward": -4.470348358154297e-08, "reward_std": 1.0288808345794678, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14095206227850182, "rewards/wordcountpos_reward/raw_geo/std": 0.09311703185119931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1177.0625, "completions/mean_terminated_length": 1177.0625, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.36087217443488695, "frac_reward_zero_std": 0.0, "grad_norm": 2.949699527487144, "kl": 0.01171112060546875, "learning_rate": 8.262325527949206e-07, "loss": 0.0258, "num_tokens": 78494049.0, "reward": 0.0, "reward_std": 0.7659751772880554, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006762738395120067, "rewards/wordcountpos_reward/raw_geo/std": 0.08085406769676408, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.36107221444288856, "frac_reward_zero_std": 0.0, "grad_norm": 3.495448379744296, "kl": 0.01702880859375, "learning_rate": 8.25984425922116e-07, "loss": 0.012, "num_tokens": 78534271.0, "reward": 0.0, "reward_std": 0.8465592861175537, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05835433192742853, "rewards/wordcountpos_reward/raw_geo/std": 0.09148151953920727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1292.0, "completions/mean_terminated_length": 1167.2000732421875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.3612722544508902, "frac_reward_zero_std": 0.0, "grad_norm": 3.21159157079306, "kl": 0.0166778564453125, "learning_rate": 8.257361644780519e-07, "loss": -0.0547, "num_tokens": 78577151.0, "reward": 3.725290298461914e-08, "reward_std": 0.9943972826004028, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24471950128552003, "rewards/wordcountpos_reward/raw_geo/std": 0.2686332432030244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1212.5, "completions/mean_terminated_length": 1171.4285888671875, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.3614722944588918, "frac_reward_zero_std": 0.0, "grad_norm": 2.9356515905635083, "kl": 0.0156707763671875, "learning_rate": 8.254877685837824e-07, "loss": -0.0322, "num_tokens": 78622559.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0484960079193115, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00981040337832653, "rewards/wordcountpos_reward/raw_geo/std": 0.05010981423305183, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1090.75, "completions/mean_terminated_length": 1063.4666748046875, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.3616723344668934, "frac_reward_zero_std": 0.0, "grad_norm": 3.1252609672388645, "kl": 0.01495361328125, "learning_rate": 8.252392383604255e-07, "loss": 0.0216, "num_tokens": 78663371.0, "reward": -3.725290298461914e-08, "reward_std": 0.9002479314804077, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07777724975624825, "rewards/wordcountpos_reward/raw_geo/std": 0.10757979186213981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1252.8125, "completions/mean_terminated_length": 1060.5555419921875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.36187237447489495, "frac_reward_zero_std": 0.0, "grad_norm": 2.9499258805514774, "kl": 0.0132904052734375, "learning_rate": 8.249905739291665e-07, "loss": -0.048, "num_tokens": 78721856.0, "reward": 0.0, "reward_std": 0.29172074794769287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16820539670743995, "rewards/wordcountpos_reward/raw_geo/std": 0.17688862008960812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1105.375, "completions/mean_terminated_length": 1105.375, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.36207241448289657, "frac_reward_zero_std": 0.0, "grad_norm": 3.302669162572897, "kl": 0.0185546875, "learning_rate": 8.247417754112548e-07, "loss": -0.0221, "num_tokens": 78764430.0, "reward": 0.0, "reward_std": 0.9743021726608276, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.057774432361472575, "rewards/wordcountpos_reward/raw_geo/std": 0.17914478841372436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1074.3125, "completions/mean_terminated_length": 1074.3125, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.3622724544908982, "frac_reward_zero_std": 0.0, "grad_norm": 2.8862356222855836, "kl": 0.0129852294921875, "learning_rate": 8.244928429280058e-07, "loss": -0.0118, "num_tokens": 78815435.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0328383445739746, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19936709670928118, "rewards/wordcountpos_reward/raw_geo/std": 0.12374873256768339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1270.5625, "completions/mean_terminated_length": 1255.2667236328125, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.3624724944988998, "frac_reward_zero_std": 0.0, "grad_norm": 3.362947019605936, "kl": 0.018341064453125, "learning_rate": 8.242437766008001e-07, "loss": -0.003, "num_tokens": 78864652.0, "reward": 0.0, "reward_std": 0.9848069548606873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2910261059173036, "rewards/wordcountpos_reward/raw_geo/std": 0.4390551779593427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1398.25, "completions/mean_terminated_length": 1267.4285888671875, "completions/min_length": 1193.0, "completions/min_terminated_length": 1193.0, "epoch": 0.3626725345069014, "frac_reward_zero_std": 0.0, "grad_norm": 2.9282462948694925, "kl": 0.016021728515625, "learning_rate": 8.239945765510837e-07, "loss": -0.0014, "num_tokens": 78921368.0, "reward": 0.0, "reward_std": 1.020251989364624, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10727308133884657, "rewards/wordcountpos_reward/raw_geo/std": 0.07128573848954754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1007.25, "completions/mean_terminated_length": 1007.25, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.36287257451490296, "frac_reward_zero_std": 0.0, "grad_norm": 3.844677273517912, "kl": 0.021209716796875, "learning_rate": 8.237452429003676e-07, "loss": 0.0591, "num_tokens": 78960388.0, "reward": 0.0, "reward_std": 0.9148995876312256, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.021322126686447004, "rewards/wordcountpos_reward/raw_geo/std": 0.14209340342548074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1054.5625, "completions/mean_terminated_length": 1054.5625, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.36307261452290457, "frac_reward_zero_std": 0.0, "grad_norm": 3.047800812824658, "kl": 0.01251220703125, "learning_rate": 8.23495775770228e-07, "loss": -0.0189, "num_tokens": 78993869.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9362246990203857, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12353954082351107, "rewards/wordcountpos_reward/raw_geo/std": 0.24604683802449492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1059.25, "completions/mean_terminated_length": 1059.25, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.3632726545309062, "frac_reward_zero_std": 0.0, "grad_norm": 3.5556943495654374, "kl": 0.0140838623046875, "learning_rate": 8.232461752823062e-07, "loss": -0.0575, "num_tokens": 79049849.0, "reward": 0.0, "reward_std": 0.9359241724014282, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0049627015493997366, "rewards/wordcountpos_reward/raw_geo/std": 0.03136479253111927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1325.75, "completions/mean_terminated_length": 1285.5384521484375, "completions/min_length": 1155.0, "completions/min_terminated_length": 1155.0, "epoch": 0.3634726945389078, "frac_reward_zero_std": 0.0, "grad_norm": 3.0159906307633295, "kl": 0.0191192626953125, "learning_rate": 8.229964415583086e-07, "loss": -0.0058, "num_tokens": 79092245.0, "reward": 5.960464477539063e-08, "reward_std": 0.7740247249603271, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.141742588716008, "rewards/wordcountpos_reward/raw_geo/std": 0.18053920611945395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1168.8125, "completions/mean_terminated_length": 1168.8125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.3636727345469094, "frac_reward_zero_std": 0.0, "grad_norm": 3.622809751475295, "kl": 0.01934814453125, "learning_rate": 8.227465747200064e-07, "loss": -0.031, "num_tokens": 79142082.0, "reward": 0.0, "reward_std": 0.9605069160461426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08405482463646154, "rewards/wordcountpos_reward/raw_geo/std": 0.08096982774467543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1280.6875, "completions/mean_terminated_length": 1266.0667724609375, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.36387277455491096, "frac_reward_zero_std": 0.0, "grad_norm": 3.0529833266420776, "kl": 0.0132904052734375, "learning_rate": 8.224965748892358e-07, "loss": 0.0162, "num_tokens": 79180189.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0431344509124756, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26369666091407545, "rewards/wordcountpos_reward/raw_geo/std": 0.3036814403641429, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1320.75, "completions/mean_terminated_length": 1279.3846435546875, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.3640728145629126, "frac_reward_zero_std": 0.0, "grad_norm": 3.2941304181565414, "kl": 0.017578125, "learning_rate": 8.222464421878981e-07, "loss": 0.0146, "num_tokens": 79228385.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7098006010055542, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057948495883413026, "rewards/wordcountpos_reward/raw_geo/std": 0.06234528827688029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1274.75, "completions/mean_terminated_length": 1049.5, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.3642728545709142, "frac_reward_zero_std": 0.0, "grad_norm": 3.3409530432648826, "kl": 0.0156707763671875, "learning_rate": 8.219961767379586e-07, "loss": -0.0277, "num_tokens": 79274237.0, "reward": 0.0, "reward_std": 0.9899231791496277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.057861876067617266, "rewards/wordcountpos_reward/raw_geo/std": 0.14477782639437073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1029.25, "completions/mean_terminated_length": 1029.25, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.3644728945789158, "frac_reward_zero_std": 0.0, "grad_norm": 3.0071070392314367, "kl": 0.019500732421875, "learning_rate": 8.217457786614486e-07, "loss": -0.006, "num_tokens": 79312465.0, "reward": -7.450580596923828e-09, "reward_std": 1.045750617980957, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14147908353252175, "rewards/wordcountpos_reward/raw_geo/std": 0.09345848530321599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1204.75, "completions/mean_terminated_length": 1136.615478515625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.3646729345869174, "frac_reward_zero_std": 0.0, "grad_norm": 3.144147743271278, "kl": 0.016754150390625, "learning_rate": 8.214952480804626e-07, "loss": -0.03, "num_tokens": 79355541.0, "reward": 0.0, "reward_std": 0.9408653974533081, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07703014828798162, "rewards/wordcountpos_reward/raw_geo/std": 0.30097141426609236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1201.375, "completions/mean_terminated_length": 1065.6363525390625, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.36487297459491896, "frac_reward_zero_std": 0.0, "grad_norm": 2.976861528842559, "kl": 0.0150909423828125, "learning_rate": 8.212445851171611e-07, "loss": -0.0047, "num_tokens": 79409979.0, "reward": -2.9802322387695312e-08, "reward_std": 0.26856812834739685, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10642404463950794, "rewards/wordcountpos_reward/raw_geo/std": 0.24518872089468613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.15371932093796678, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1219.0625, "completions/mean_terminated_length": 1200.3333740234375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.3650730146029206, "frac_reward_zero_std": 0.0, "grad_norm": 2.87775218369435, "kl": 0.0119781494140625, "learning_rate": 8.20993789893768e-07, "loss": -0.0394, "num_tokens": 79449748.0, "reward": 5.960464477539063e-08, "reward_std": 0.3918062746524811, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12539962576777897, "rewards/wordcountpos_reward/raw_geo/std": 0.1661852518675689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1033.1875, "completions/mean_terminated_length": 1002.0667114257812, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.3652730546109222, "frac_reward_zero_std": 0.0, "grad_norm": 3.0024437396519903, "kl": 0.00919342041015625, "learning_rate": 8.207428625325724e-07, "loss": 0.0113, "num_tokens": 79480151.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8506966829299927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013722265177571254, "rewards/wordcountpos_reward/raw_geo/std": 0.08267955611830852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1030.125, "completions/mean_terminated_length": 1030.125, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.3654730946189238, "frac_reward_zero_std": 0.0, "grad_norm": 3.0778783430712706, "kl": 0.0142364501953125, "learning_rate": 8.204918031559278e-07, "loss": -0.0072, "num_tokens": 79514409.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8732375502586365, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10469602008461053, "rewards/wordcountpos_reward/raw_geo/std": 0.17817002967232337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 902.625, "completions/mean_terminated_length": 902.625, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.3656731346269254, "frac_reward_zero_std": 0.0, "grad_norm": 2.8507718348998043, "kl": 0.01122283935546875, "learning_rate": 8.202406118862515e-07, "loss": -0.049, "num_tokens": 79541803.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9573464393615723, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014775020794371912, "rewards/wordcountpos_reward/raw_geo/std": 0.028827887039568713, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1289.0, "completions/mean_terminated_length": 1240.3077392578125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.36587317463492697, "frac_reward_zero_std": 0.0, "grad_norm": 3.0884261143976093, "kl": 0.0144500732421875, "learning_rate": 8.199892888460257e-07, "loss": 0.0072, "num_tokens": 79587339.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9736486673355103, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008385610714534766, "rewards/wordcountpos_reward/raw_geo/std": 0.2659185418460851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1222.9375, "completions/mean_terminated_length": 1056.7000732421875, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.3660732146429286, "frac_reward_zero_std": 0.0, "grad_norm": 3.4423035411222758, "kl": 0.017242431640625, "learning_rate": 8.197378341577969e-07, "loss": 0.0079, "num_tokens": 79633778.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9648916721343994, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21919891557510968, "rewards/wordcountpos_reward/raw_geo/std": 0.2835315590206603, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1077.25, "completions/mean_terminated_length": 1077.25, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.3662732546509302, "frac_reward_zero_std": 0.0, "grad_norm": 3.7603464526984602, "kl": 0.018402099609375, "learning_rate": 8.194862479441751e-07, "loss": -0.0077, "num_tokens": 79673342.0, "reward": 0.0, "reward_std": 0.5395322442054749, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13901132861790835, "rewards/wordcountpos_reward/raw_geo/std": 0.0927639189482725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1032.9375, "completions/mean_terminated_length": 1001.800048828125, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.3664732946589318, "frac_reward_zero_std": 0.0, "grad_norm": 3.772490719780789, "kl": 0.014617919921875, "learning_rate": 8.192345303278351e-07, "loss": 0.0321, "num_tokens": 79724645.0, "reward": 0.0, "reward_std": 1.0520801544189453, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07846940681682796, "rewards/wordcountpos_reward/raw_geo/std": 0.10355564138529583, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1011.6875, "completions/mean_terminated_length": 1011.6875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.36667333466693336, "frac_reward_zero_std": 0.0, "grad_norm": 2.3522727681567503, "kl": 0.00550079345703125, "learning_rate": 8.189826814315157e-07, "loss": -0.0556, "num_tokens": 79771856.0, "reward": 0.0, "reward_std": 0.9150753021240234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03839500281230674, "rewards/wordcountpos_reward/raw_geo/std": 0.04427347112677234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1365.375, "completions/mean_terminated_length": 1260.6666259765625, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.36687337467493497, "frac_reward_zero_std": 0.0, "grad_norm": 3.061998896130383, "kl": 0.0146942138671875, "learning_rate": 8.187307013780192e-07, "loss": -0.0257, "num_tokens": 79826558.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8393835425376892, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2577978184836686, "rewards/wordcountpos_reward/raw_geo/std": 0.19121356925209101, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1127.125, "completions/mean_terminated_length": 1127.125, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3670734146829366, "frac_reward_zero_std": 0.0, "grad_norm": 3.4650944330297193, "kl": 0.017059326171875, "learning_rate": 8.184785902902125e-07, "loss": -0.0144, "num_tokens": 79868576.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9694485664367676, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01570201510441444, "rewards/wordcountpos_reward/raw_geo/std": 0.06974948777510649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1195.125, "completions/mean_terminated_length": 1174.800048828125, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.3672734546909382, "frac_reward_zero_std": 0.0, "grad_norm": 3.40036586796964, "kl": 0.018310546875, "learning_rate": 8.182263482910263e-07, "loss": 0.0458, "num_tokens": 79920738.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9649240970611572, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19635494759081326, "rewards/wordcountpos_reward/raw_geo/std": 0.1508074744900027, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1084.0625, "completions/mean_terminated_length": 1084.0625, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.3674734946989398, "frac_reward_zero_std": 0.0, "grad_norm": 3.375024906691013, "kl": 0.01593017578125, "learning_rate": 8.179739755034543e-07, "loss": -0.0244, "num_tokens": 79956307.0, "reward": -7.450580596923828e-09, "reward_std": 1.0107815265655518, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.05276250456531001, "rewards/wordcountpos_reward/raw_geo/std": 0.06400813602425351, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1115.625, "completions/mean_terminated_length": 1115.625, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.36767353470694136, "frac_reward_zero_std": 0.0, "grad_norm": 2.6501237195453733, "kl": 0.0118560791015625, "learning_rate": 8.17721472050555e-07, "loss": 0.0298, "num_tokens": 79996685.0, "reward": 2.60770320892334e-08, "reward_std": 1.0687845945358276, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17483110523366335, "rewards/wordcountpos_reward/raw_geo/std": 0.09664237231048073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1324.625, "completions/mean_terminated_length": 1284.1539306640625, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.367873574714943, "frac_reward_zero_std": 0.0, "grad_norm": 2.652782203586809, "kl": 0.0120697021484375, "learning_rate": 8.174688380554505e-07, "loss": -0.0672, "num_tokens": 80049727.0, "reward": 0.0, "reward_std": 0.9376136660575867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008059046860446482, "rewards/wordcountpos_reward/raw_geo/std": 0.19958827714430133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1124.5, "completions/mean_terminated_length": 1070.857177734375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.3680736147229446, "frac_reward_zero_std": 0.0, "grad_norm": 2.3860024329956464, "kl": 0.0117645263671875, "learning_rate": 8.17216073641326e-07, "loss": -0.0094, "num_tokens": 80094503.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9552346467971802, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07014013620540586, "rewards/wordcountpos_reward/raw_geo/std": 0.11240748096020631, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346314, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1117.75, "completions/mean_terminated_length": 1063.1429443359375, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.3682736547309462, "frac_reward_zero_std": 0.0, "grad_norm": 2.798661927667315, "kl": 0.014251708984375, "learning_rate": 8.169631789314304e-07, "loss": -0.0037, "num_tokens": 80128987.0, "reward": 0.0, "reward_std": 0.5800498127937317, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025800158249784544, "rewards/wordcountpos_reward/raw_geo/std": 0.05002555048519393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1326.375, "completions/mean_terminated_length": 1301.571533203125, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.3684736947389478, "frac_reward_zero_std": 0.0, "grad_norm": 2.7011703308118733, "kl": 0.01255035400390625, "learning_rate": 8.167101540490765e-07, "loss": 0.0015, "num_tokens": 80186137.0, "reward": 0.0, "reward_std": 0.4640793800354004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2221206158744349, "rewards/wordcountpos_reward/raw_geo/std": 0.27423727957163163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1105.0, "completions/mean_terminated_length": 1078.666748046875, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.36867373474694937, "frac_reward_zero_std": 0.0, "grad_norm": 2.710687970510179, "kl": 0.012603759765625, "learning_rate": 8.164569991176405e-07, "loss": 0.0025, "num_tokens": 80229001.0, "reward": 5.960464477539063e-08, "reward_std": 0.8225865364074707, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12277039445562876, "rewards/wordcountpos_reward/raw_geo/std": 0.15826646799275648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1136.0, "completions/mean_length": 987.5, "completions/mean_terminated_length": 953.3333740234375, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.368873774754951, "frac_reward_zero_std": 0.0, "grad_norm": 2.860698521947595, "kl": 0.0139617919921875, "learning_rate": 8.162037142605618e-07, "loss": 0.0304, "num_tokens": 80274153.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9617314338684082, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07883161230613318, "rewards/wordcountpos_reward/raw_geo/std": 0.061161064731399976, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1074.5, "completions/mean_terminated_length": 1074.5, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.3690738147629526, "frac_reward_zero_std": 0.0, "grad_norm": 3.749670466898167, "kl": 0.021209716796875, "learning_rate": 8.159502996013432e-07, "loss": 0.0282, "num_tokens": 80314889.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9347069263458252, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11362970597815501, "rewards/wordcountpos_reward/raw_geo/std": 0.08290296972841302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1259.5625, "completions/mean_terminated_length": 1225.21435546875, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.3692738547709542, "frac_reward_zero_std": 0.0, "grad_norm": 2.4619994178552678, "kl": 0.01153564453125, "learning_rate": 8.156967552635507e-07, "loss": -0.0477, "num_tokens": 80349322.0, "reward": 0.0, "reward_std": 0.8978796005249023, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21797604515259808, "rewards/wordcountpos_reward/raw_geo/std": 0.06584706291386985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1124.5, "completions/mean_terminated_length": 1099.4666748046875, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.3694738947789558, "frac_reward_zero_std": 0.0, "grad_norm": 2.8384266924612613, "kl": 0.0146026611328125, "learning_rate": 8.154430813708139e-07, "loss": 0.0388, "num_tokens": 80390146.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9611221551895142, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01323351065717172, "rewards/wordcountpos_reward/raw_geo/std": 0.051494527922501664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 973.8125, "completions/mean_terminated_length": 973.8125, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.36967393478695737, "frac_reward_zero_std": 0.0, "grad_norm": 3.5800364932268693, "kl": 0.0137939453125, "learning_rate": 8.151892780468255e-07, "loss": 0.0432, "num_tokens": 80439639.0, "reward": -1.4901161193847656e-08, "reward_std": 0.932483434677124, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011671567667568904, "rewards/wordcountpos_reward/raw_geo/std": 0.11520379441484395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.369873974794959, "frac_reward_zero_std": 0.0, "grad_norm": 3.369332224108131, "kl": 0.01531982421875, "learning_rate": 8.149353454153407e-07, "loss": -0.0268, "num_tokens": 80483816.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7892588376998901, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10674324255055707, "rewards/wordcountpos_reward/raw_geo/std": 0.10267537527623709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1041.857177734375, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.3700740148029606, "frac_reward_zero_std": 0.0, "grad_norm": 3.133686382733146, "kl": 0.013427734375, "learning_rate": 8.146812836001785e-07, "loss": 0.0355, "num_tokens": 80520610.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7959136962890625, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07482712627769707, "rewards/wordcountpos_reward/raw_geo/std": 0.1138851042842639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 999.3125, "completions/mean_terminated_length": 999.3125, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.3702740548109622, "frac_reward_zero_std": 0.0, "grad_norm": 3.462261724574105, "kl": 0.016815185546875, "learning_rate": 8.144270927252204e-07, "loss": 0.0059, "num_tokens": 80563279.0, "reward": 0.0, "reward_std": 0.8265146017074585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03689876859721495, "rewards/wordcountpos_reward/raw_geo/std": 0.13863521092532205, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 952.9375, "completions/mean_terminated_length": 952.9375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.3704740948189638, "frac_reward_zero_std": 0.0, "grad_norm": 3.333794047937921, "kl": 0.0162200927734375, "learning_rate": 8.141727729144112e-07, "loss": -0.0025, "num_tokens": 80594014.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7653108835220337, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2791406663851336, "rewards/wordcountpos_reward/raw_geo/std": 0.16561056645250977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1163.8182373046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.3706741348269654, "frac_reward_zero_std": 0.0, "grad_norm": 2.4426261727639056, "kl": 0.0106658935546875, "learning_rate": 8.139183242917584e-07, "loss": -0.0181, "num_tokens": 80639532.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8818885087966919, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03210156053205511, "rewards/wordcountpos_reward/raw_geo/std": 0.19276358220791479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 1069.125, "completions/mean_terminated_length": 1040.4000244140625, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.370874174834967, "frac_reward_zero_std": 0.0, "grad_norm": 3.462912751322825, "kl": 0.016387939453125, "learning_rate": 8.136637469813322e-07, "loss": 0.023, "num_tokens": 80687206.0, "reward": 0.0, "reward_std": 0.8476862907409668, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0008727951135355152, "rewards/wordcountpos_reward/raw_geo/std": 0.10408340425693237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1127.9375, "completions/mean_terminated_length": 1127.9375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.3710742148429686, "frac_reward_zero_std": 0.0, "grad_norm": 3.660521321383793, "kl": 0.017608642578125, "learning_rate": 8.134090411072658e-07, "loss": 0.0341, "num_tokens": 80729037.0, "reward": 2.60770320892334e-08, "reward_std": 0.9623128175735474, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009920104988550593, "rewards/wordcountpos_reward/raw_geo/std": 0.018840685589641703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666118, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 1025.1875, "completions/mean_terminated_length": 1025.1875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.3712742548509702, "frac_reward_zero_std": 0.0, "grad_norm": 3.61923799356543, "kl": 0.01715087890625, "learning_rate": 8.131542067937548e-07, "loss": -0.025, "num_tokens": 80764208.0, "reward": 0.0, "reward_std": 1.0515928268432617, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1173617770513525, "rewards/wordcountpos_reward/raw_geo/std": 0.0839586337671737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1382.125, "completions/mean_terminated_length": 1264.25, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.3714742948589718, "frac_reward_zero_std": 0.0, "grad_norm": 3.1547170307952532, "kl": 0.01800537109375, "learning_rate": 8.128992441650576e-07, "loss": 0.0075, "num_tokens": 80821066.0, "reward": 0.0, "reward_std": 0.5757333040237427, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06549123326091302, "rewards/wordcountpos_reward/raw_geo/std": 0.12149685215900144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1262.25, "completions/mean_terminated_length": 1183.0, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.3716743348669734, "frac_reward_zero_std": 0.0, "grad_norm": 3.3606634785890557, "kl": 0.0180816650390625, "learning_rate": 8.12644153345495e-07, "loss": 0.0378, "num_tokens": 80871526.0, "reward": 0.0, "reward_std": 0.9643378257751465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2289128533987056, "rewards/wordcountpos_reward/raw_geo/std": 0.15675590382858848, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1268.6875, "completions/mean_terminated_length": 1253.2667236328125, "completions/min_length": 1119.0, "completions/min_terminated_length": 1119.0, "epoch": 0.371874374874975, "frac_reward_zero_std": 0.0, "grad_norm": 2.6221149231394136, "kl": 0.012451171875, "learning_rate": 8.123889344594509e-07, "loss": 0.0037, "num_tokens": 80915705.0, "reward": 0.0, "reward_std": 0.8176782131195068, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16005347872427947, "rewards/wordcountpos_reward/raw_geo/std": 0.07748663831624279, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1082.625, "completions/mean_terminated_length": 1082.625, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.3720744148829766, "frac_reward_zero_std": 0.0, "grad_norm": 3.138903452248144, "kl": 0.0123443603515625, "learning_rate": 8.121335876313706e-07, "loss": -0.0106, "num_tokens": 80955875.0, "reward": 0.0, "reward_std": 0.8863394856452942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0692910885253618, "rewards/wordcountpos_reward/raw_geo/std": 0.06053219819904118, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1024.375, "completions/mean_terminated_length": 992.666748046875, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.3722744548909782, "frac_reward_zero_std": 0.0, "grad_norm": 2.9354629314553975, "kl": 0.0108489990234375, "learning_rate": 8.118781129857628e-07, "loss": -0.0141, "num_tokens": 80999209.0, "reward": 0.0, "reward_std": 0.756900429725647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08557220608108208, "rewards/wordcountpos_reward/raw_geo/std": 0.08448798990737123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1275.875, "completions/mean_terminated_length": 1275.875, "completions/min_length": 1108.0, "completions/min_terminated_length": 1108.0, "epoch": 0.37247449489897977, "frac_reward_zero_std": 0.0, "grad_norm": 2.4051441685279995, "kl": 0.009002685546875, "learning_rate": 8.116225106471978e-07, "loss": -0.0172, "num_tokens": 81037215.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9300907254219055, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05385873578666606, "rewards/wordcountpos_reward/raw_geo/std": 0.08224782029935941, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 961.8125, "completions/mean_terminated_length": 961.8125, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.3726745349069814, "frac_reward_zero_std": 0.0, "grad_norm": 3.518761209492957, "kl": 0.018524169921875, "learning_rate": 8.113667807403089e-07, "loss": -0.0051, "num_tokens": 81073156.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7832204103469849, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012379012150703632, "rewards/wordcountpos_reward/raw_geo/std": 0.07674561883460404, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1247.5, "completions/mean_terminated_length": 1247.5, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.372874574914983, "frac_reward_zero_std": 0.0, "grad_norm": 2.3484643951945947, "kl": 0.00971221923828125, "learning_rate": 8.111109233897906e-07, "loss": -0.0368, "num_tokens": 81112492.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6269381046295166, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.019791208422819635, "rewards/wordcountpos_reward/raw_geo/std": 0.1581129781355918, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1204.9375, "completions/mean_terminated_length": 1162.7857666015625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.3730746149229846, "frac_reward_zero_std": 0.0, "grad_norm": 2.981603826177926, "kl": 0.0142669677734375, "learning_rate": 8.108549387204003e-07, "loss": -0.0261, "num_tokens": 81159467.0, "reward": -5.960464477539063e-08, "reward_std": 0.3111901581287384, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.000691979650867508, "rewards/wordcountpos_reward/raw_geo/std": 0.11500327837435816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1127.375, "completions/mean_terminated_length": 1127.375, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.3732746549309862, "frac_reward_zero_std": 0.0, "grad_norm": 3.2453506066105158, "kl": 0.0151214599609375, "learning_rate": 8.105988268569574e-07, "loss": 0.0199, "num_tokens": 81203633.0, "reward": 0.0, "reward_std": 0.9021538496017456, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07556178352899041, "rewards/wordcountpos_reward/raw_geo/std": 0.17826632855308816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1315.625, "completions/mean_terminated_length": 1131.25, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.3734746949389878, "frac_reward_zero_std": 0.0, "grad_norm": 2.8131413864184305, "kl": 0.0121917724609375, "learning_rate": 8.103425879243434e-07, "loss": 0.0419, "num_tokens": 81255915.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7938408851623535, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028796416093208238, "rewards/wordcountpos_reward/raw_geo/std": 0.09662106698154437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1145.875, "completions/mean_terminated_length": 1145.875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.3736747349469894, "frac_reward_zero_std": 0.0, "grad_norm": 3.0650565775564305, "kl": 0.01324462890625, "learning_rate": 8.100862220475012e-07, "loss": -0.0256, "num_tokens": 81290049.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0591579675674438, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17444215583776151, "rewards/wordcountpos_reward/raw_geo/std": 0.11300093737760178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 895.1875, "completions/mean_terminated_length": 895.1875, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.373874774954991, "frac_reward_zero_std": 0.0, "grad_norm": 4.220325569697498, "kl": 0.023956298828125, "learning_rate": 8.098297293514361e-07, "loss": 0.0157, "num_tokens": 81333420.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9887953996658325, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14887187830805024, "rewards/wordcountpos_reward/raw_geo/std": 0.06706316666557359, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.6333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.23975295927575427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1073.6875, "completions/mean_terminated_length": 1073.6875, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.3740748149629926, "frac_reward_zero_std": 0.0, "grad_norm": 3.763978521516728, "kl": 0.02178955078125, "learning_rate": 8.095731099612152e-07, "loss": 0.0547, "num_tokens": 81382175.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5390594005584717, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01783476792465052, "rewards/wordcountpos_reward/raw_geo/std": 0.10479206560531525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1051.625, "completions/mean_terminated_length": 1021.7333984375, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.3742748549709942, "frac_reward_zero_std": 0.0, "grad_norm": 3.8519201542175057, "kl": 0.0146636962890625, "learning_rate": 8.093163640019671e-07, "loss": -0.0041, "num_tokens": 81420329.0, "reward": -1.4901161193847656e-08, "reward_std": 1.068649411201477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05412474011199657, "rewards/wordcountpos_reward/raw_geo/std": 0.05974750624523525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1197.0, "completions/mean_terminated_length": 1176.800048828125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.3744748949789958, "frac_reward_zero_std": 0.0, "grad_norm": 2.525121655366482, "kl": 0.0125274658203125, "learning_rate": 8.090594915988823e-07, "loss": -0.0214, "num_tokens": 81464977.0, "reward": -7.450580596923828e-09, "reward_std": 1.052506685256958, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.050312543323886884, "rewards/wordcountpos_reward/raw_geo/std": 0.08035050714461821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1069.375, "completions/mean_terminated_length": 1069.375, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.3746749349869974, "frac_reward_zero_std": 0.0, "grad_norm": 3.186200183313209, "kl": 0.0144805908203125, "learning_rate": 8.088024928772133e-07, "loss": -0.032, "num_tokens": 81514199.0, "reward": 0.0, "reward_std": 0.8151905536651611, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006231681982518772, "rewards/wordcountpos_reward/raw_geo/std": 0.11669170906464493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1237.0, "completions/mean_terminated_length": 1219.4666748046875, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.374874974994999, "frac_reward_zero_std": 0.0, "grad_norm": 3.1189958831984264, "kl": 0.0176544189453125, "learning_rate": 8.085453679622733e-07, "loss": -0.0213, "num_tokens": 81559367.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0618932247161865, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019118860335724054, "rewards/wordcountpos_reward/raw_geo/std": 0.0851130398743582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1307.8125, "completions/mean_terminated_length": 1243.75, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.3750750150030006, "frac_reward_zero_std": 0.0, "grad_norm": 3.2186249935994735, "kl": 0.01312255859375, "learning_rate": 8.08288116979438e-07, "loss": -0.0156, "num_tokens": 81612668.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0660264492034912, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08660032463575403, "rewards/wordcountpos_reward/raw_geo/std": 0.2848267154172795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1169.375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.3752750550110022, "frac_reward_zero_std": 0.0, "grad_norm": 2.522855626455326, "kl": 0.0129547119140625, "learning_rate": 8.080307400541438e-07, "loss": -0.0259, "num_tokens": 81663442.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9226261377334595, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03787609101500173, "rewards/wordcountpos_reward/raw_geo/std": 0.13690495238141798, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1090.375, "completions/mean_terminated_length": 1031.857177734375, "completions/min_length": 633.0, "completions/min_terminated_length": 633.0, "epoch": 0.3754750950190038, "frac_reward_zero_std": 0.0, "grad_norm": 2.481631576153375, "kl": 0.00853729248046875, "learning_rate": 8.077732373118892e-07, "loss": -0.039, "num_tokens": 81714144.0, "reward": 0.0, "reward_std": 0.6557045578956604, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1161541012946108, "rewards/wordcountpos_reward/raw_geo/std": 0.1707889175429663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1122.9375, "completions/mean_terminated_length": 1069.071533203125, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.3756751350270054, "frac_reward_zero_std": 0.0, "grad_norm": 3.2440051727327015, "kl": 0.021240234375, "learning_rate": 8.075156088782336e-07, "loss": 0.0173, "num_tokens": 81756359.0, "reward": 0.0, "reward_std": 0.9885783195495605, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09043644444842076, "rewards/wordcountpos_reward/raw_geo/std": 0.17240085471737693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1344.375, "completions/mean_terminated_length": 1223.3333740234375, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.375875175035007, "frac_reward_zero_std": 0.0, "grad_norm": 3.3864190807251497, "kl": 0.02215576171875, "learning_rate": 8.072578548787977e-07, "loss": -0.0006, "num_tokens": 81806677.0, "reward": 0.0, "reward_std": 0.8474569916725159, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08331627754897028, "rewards/wordcountpos_reward/raw_geo/std": 0.07406014793765668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.161245154965971, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1395.1875, "completions/mean_terminated_length": 1290.375, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3760752150430086, "frac_reward_zero_std": 0.0, "grad_norm": 2.969836669081187, "kl": 0.014556884765625, "learning_rate": 8.069999754392635e-07, "loss": -0.0172, "num_tokens": 81858560.0, "reward": 0.0, "reward_std": 0.6628202199935913, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23797623216303446, "rewards/wordcountpos_reward/raw_geo/std": 0.2661596562254852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1209.25, "completions/mean_terminated_length": 1189.86669921875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.3762752550510102, "frac_reward_zero_std": 0.0, "grad_norm": 3.2050093578750936, "kl": 0.016571044921875, "learning_rate": 8.067419706853744e-07, "loss": -0.0001, "num_tokens": 81903196.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9129930734634399, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027261719297969597, "rewards/wordcountpos_reward/raw_geo/std": 0.06716088882655795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767716, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1045.75, "completions/mean_terminated_length": 1045.75, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.3764752950590118, "frac_reward_zero_std": 0.0, "grad_norm": 3.578605344271528, "kl": 0.0166168212890625, "learning_rate": 8.064838407429346e-07, "loss": -0.0055, "num_tokens": 81938656.0, "reward": 0.0, "reward_std": 0.7521936893463135, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08213157758908353, "rewards/wordcountpos_reward/raw_geo/std": 0.10115363309653723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1220.875, "completions/mean_terminated_length": 1220.875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.3766753350670134, "frac_reward_zero_std": 0.0, "grad_norm": 2.810146959634015, "kl": 0.0140838623046875, "learning_rate": 8.062255857378093e-07, "loss": 0.0195, "num_tokens": 81986622.0, "reward": 1.4901161193847656e-08, "reward_std": 1.032131314277649, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0015396827552956668, "rewards/wordcountpos_reward/raw_geo/std": 0.3054826164216197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1236.875, "completions/mean_terminated_length": 1149.166748046875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.376875375075015, "frac_reward_zero_std": 0.0, "grad_norm": 3.070952962145064, "kl": 0.0138702392578125, "learning_rate": 8.059672057959249e-07, "loss": -0.0011, "num_tokens": 82040596.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0383249521255493, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1995550056065262, "rewards/wordcountpos_reward/raw_geo/std": 0.21835264134876262, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1175.3077392578125, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.3770754150830166, "frac_reward_zero_std": 0.0, "grad_norm": 3.2556958644413214, "kl": 0.017822265625, "learning_rate": 8.057087010432686e-07, "loss": 0.0179, "num_tokens": 82093159.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9340291023254395, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0845777446405428, "rewards/wordcountpos_reward/raw_geo/std": 0.2351616726282149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1100.25, "completions/mean_terminated_length": 1100.25, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.37727545509101823, "frac_reward_zero_std": 0.0, "grad_norm": 3.7600843921522267, "kl": 0.0196533203125, "learning_rate": 8.054500716058886e-07, "loss": 0.0105, "num_tokens": 82145347.0, "reward": 0.0, "reward_std": 0.8025621175765991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09784720800465033, "rewards/wordcountpos_reward/raw_geo/std": 0.2004384300014111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.0909822937597079, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1134.75, "completions/mean_terminated_length": 1110.4000244140625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.3774754950990198, "frac_reward_zero_std": 0.0, "grad_norm": 2.889865735440464, "kl": 0.01061248779296875, "learning_rate": 8.051913176098937e-07, "loss": -0.0038, "num_tokens": 82187375.0, "reward": 0.0, "reward_std": 0.9804905652999878, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0034500681463277188, "rewards/wordcountpos_reward/raw_geo/std": 0.07566302851856088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05374838498865701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1086.3125, "completions/mean_terminated_length": 1086.3125, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.3776755351070214, "frac_reward_zero_std": 0.0, "grad_norm": 3.5220808448269145, "kl": 0.021697998046875, "learning_rate": 8.049324391814534e-07, "loss": -0.0521, "num_tokens": 82229972.0, "reward": 0.0, "reward_std": 0.9319818019866943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.37637222217987093, "rewards/wordcountpos_reward/raw_geo/std": 0.15539419960637904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1036.3333740234375, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.377875575115023, "frac_reward_zero_std": 0.0, "grad_norm": 3.566475860277026, "kl": 0.01519775390625, "learning_rate": 8.046734364467983e-07, "loss": 0.0214, "num_tokens": 82281929.0, "reward": 2.9802322387695312e-08, "reward_std": 0.821873128414154, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01184074625030777, "rewards/wordcountpos_reward/raw_geo/std": 0.099791638864631, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 970.9375, "completions/mean_terminated_length": 970.9375, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.3780756151230246, "frac_reward_zero_std": 0.0, "grad_norm": 3.644251450810969, "kl": 0.017333984375, "learning_rate": 8.044143095322191e-07, "loss": 0.0218, "num_tokens": 82322832.0, "reward": 0.0, "reward_std": 0.8006108403205872, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12139100008409892, "rewards/wordcountpos_reward/raw_geo/std": 0.1563176934500072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1329.875, "completions/mean_terminated_length": 1227.800048828125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.3782756551310262, "frac_reward_zero_std": 0.0, "grad_norm": 3.0586865433023194, "kl": 0.0140228271484375, "learning_rate": 8.041550585640672e-07, "loss": -0.0168, "num_tokens": 82370886.0, "reward": -5.960464477539063e-08, "reward_std": 0.5831820368766785, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19207673918592394, "rewards/wordcountpos_reward/raw_geo/std": 0.20628932244654444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.15962919996504865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1239.75, "completions/mean_terminated_length": 1202.571533203125, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.3784756951390278, "frac_reward_zero_std": 0.0, "grad_norm": 2.9939547043824777, "kl": 0.0130462646484375, "learning_rate": 8.038956836687548e-07, "loss": 0.0198, "num_tokens": 82414578.0, "reward": 0.0, "reward_std": 0.9306091070175171, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14981470939782926, "rewards/wordcountpos_reward/raw_geo/std": 0.05266120876393239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1171.5, "completions/mean_terminated_length": 1124.571533203125, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.3786757351470294, "frac_reward_zero_std": 0.0, "grad_norm": 2.711848169448823, "kl": 0.01030731201171875, "learning_rate": 8.03636184972754e-07, "loss": 0.0654, "num_tokens": 82457890.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0340769290924072, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057395746879333985, "rewards/wordcountpos_reward/raw_geo/std": 0.10822074335298774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1213.0, "completions/mean_terminated_length": 1082.5455322265625, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.378875775155031, "frac_reward_zero_std": 0.0, "grad_norm": 2.443336326365855, "kl": 0.00922393798828125, "learning_rate": 8.033765626025977e-07, "loss": -0.0137, "num_tokens": 82506202.0, "reward": 0.0, "reward_std": 0.4957790970802307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10059129430785817, "rewards/wordcountpos_reward/raw_geo/std": 0.08658025150727795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13877773329774218, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1118.1875, "completions/mean_terminated_length": 1118.1875, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.3790758151630326, "frac_reward_zero_std": 0.0, "grad_norm": 2.910752801774246, "kl": 0.01043701171875, "learning_rate": 8.03116816684879e-07, "loss": -0.0203, "num_tokens": 82547973.0, "reward": 7.450580596923828e-09, "reward_std": 1.0637421607971191, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07354794964001503, "rewards/wordcountpos_reward/raw_geo/std": 0.09675585555016318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1091.125, "completions/mean_terminated_length": 1063.86669921875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.3792758551710342, "frac_reward_zero_std": 0.0, "grad_norm": 3.506860643230018, "kl": 0.019622802734375, "learning_rate": 8.028569473462509e-07, "loss": 0.0017, "num_tokens": 82580807.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0047712326049805, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054303418941168954, "rewards/wordcountpos_reward/raw_geo/std": 0.034672807493010945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1012.875, "completions/mean_terminated_length": 1012.875, "completions/min_length": 624.0, "completions/min_terminated_length": 624.0, "epoch": 0.3794758951790358, "frac_reward_zero_std": 0.0, "grad_norm": 3.8299178980494855, "kl": 0.021484375, "learning_rate": 8.025969547134273e-07, "loss": 0.0149, "num_tokens": 82618805.0, "reward": 2.9802322387695312e-08, "reward_std": 0.831100344657898, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11396198132369267, "rewards/wordcountpos_reward/raw_geo/std": 0.10066201792974971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1226.625, "completions/mean_terminated_length": 1226.625, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.3796759351870374, "frac_reward_zero_std": 0.0, "grad_norm": 3.2448259651169975, "kl": 0.0142669677734375, "learning_rate": 8.023368389131815e-07, "loss": 0.0322, "num_tokens": 82664807.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9694257974624634, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10596751817496983, "rewards/wordcountpos_reward/raw_geo/std": 0.34621208559483735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1071.0, "completions/mean_terminated_length": 1071.0, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.379875975195039, "frac_reward_zero_std": 0.0, "grad_norm": 3.2259274038463164, "kl": 0.016845703125, "learning_rate": 8.020766000723471e-07, "loss": -0.042, "num_tokens": 82717111.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7183211445808411, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14948507660395502, "rewards/wordcountpos_reward/raw_geo/std": 0.29935519168844377, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1083.9375, "completions/mean_terminated_length": 1083.9375, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.38007601520304063, "frac_reward_zero_std": 0.0, "grad_norm": 3.747387611436037, "kl": 0.016876220703125, "learning_rate": 8.01816238317818e-07, "loss": 0.0468, "num_tokens": 82759254.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9764562845230103, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11053239657412174, "rewards/wordcountpos_reward/raw_geo/std": 0.15269617347572706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1144.75, "completions/mean_terminated_length": 1144.75, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3802760552110422, "frac_reward_zero_std": 0.0, "grad_norm": 3.377289431117488, "kl": 0.022613525390625, "learning_rate": 8.015557537765475e-07, "loss": -0.0154, "num_tokens": 82810106.0, "reward": 0.0, "reward_std": 0.7665531039237976, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0014095033058528997, "rewards/wordcountpos_reward/raw_geo/std": 0.22012106481303725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1278.8125, "completions/mean_terminated_length": 1264.0667724609375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.3804760952190438, "frac_reward_zero_std": 0.0, "grad_norm": 3.2903890421268502, "kl": 0.01605224609375, "learning_rate": 8.012951465755493e-07, "loss": 0.0512, "num_tokens": 82864639.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8410732746124268, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015532918243747168, "rewards/wordcountpos_reward/raw_geo/std": 0.07347207971178882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 956.375, "completions/mean_terminated_length": 920.1333618164062, "completions/min_length": 590.0, "completions/min_terminated_length": 590.0, "epoch": 0.3806761352270454, "frac_reward_zero_std": 0.0, "grad_norm": 4.096327591645464, "kl": 0.0208740234375, "learning_rate": 8.010344168418965e-07, "loss": -0.0183, "num_tokens": 82906685.0, "reward": 0.0, "reward_std": 0.8456340432167053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12525115547120777, "rewards/wordcountpos_reward/raw_geo/std": 0.0786926357999723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1209.875, "completions/mean_terminated_length": 1209.875, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.380876175235047, "frac_reward_zero_std": 0.0, "grad_norm": 2.8117479246415122, "kl": 0.014495849609375, "learning_rate": 8.00773564702722e-07, "loss": 0.028, "num_tokens": 82948611.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8906652331352234, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023887161527222603, "rewards/wordcountpos_reward/raw_geo/std": 0.08909066334246608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 924.1875, "completions/mean_terminated_length": 924.1875, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.38107621524304863, "frac_reward_zero_std": 0.0, "grad_norm": 2.4799648897793007, "kl": 0.0122833251953125, "learning_rate": 8.005125902852187e-07, "loss": -0.0128, "num_tokens": 82988510.0, "reward": 0.0, "reward_std": 0.7398412823677063, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0534892601617431, "rewards/wordcountpos_reward/raw_geo/std": 0.053296167831901234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1029.1875, "completions/mean_terminated_length": 1029.1875, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.3812762552510502, "frac_reward_zero_std": 0.0, "grad_norm": 3.840358329587029, "kl": 0.020843505859375, "learning_rate": 8.002514937166387e-07, "loss": -0.0113, "num_tokens": 83038641.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0676360130310059, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2009288174816079, "rewards/wordcountpos_reward/raw_geo/std": 0.28535041117784893, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1167.75, "completions/mean_terminated_length": 1167.75, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.3814762952590518, "frac_reward_zero_std": 0.0, "grad_norm": 3.6111888905532146, "kl": 0.018646240234375, "learning_rate": 7.999902751242942e-07, "loss": -0.0054, "num_tokens": 83077181.0, "reward": 0.0, "reward_std": 0.7873083353042603, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010181010317690024, "rewards/wordcountpos_reward/raw_geo/std": 0.12025182911661554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1273.5625, "completions/mean_terminated_length": 1137.7000732421875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.3816763352670534, "frac_reward_zero_std": 0.0, "grad_norm": 2.6335557711050406, "kl": 0.010528564453125, "learning_rate": 7.997289346355562e-07, "loss": 0.0049, "num_tokens": 83130646.0, "reward": 0.0, "reward_std": 0.7262375354766846, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2436238092014079, "rewards/wordcountpos_reward/raw_geo/std": 0.39267478350127427, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1124.8125, "completions/mean_terminated_length": 1124.8125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.381876375275055, "frac_reward_zero_std": 0.0, "grad_norm": 3.450853428056311, "kl": 0.01708984375, "learning_rate": 7.994674723778559e-07, "loss": 0.0041, "num_tokens": 83171731.0, "reward": 0.0, "reward_std": 0.7523471117019653, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0691691540357116, "rewards/wordcountpos_reward/raw_geo/std": 0.14948410592979197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1204.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 1008.0, "completions/mean_terminated_length": 1008.0, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.38207641528305664, "frac_reward_zero_std": 0.0, "grad_norm": 3.2514580396190254, "kl": 0.0127716064453125, "learning_rate": 7.99205888478683e-07, "loss": -0.0037, "num_tokens": 83224363.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9400133490562439, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11794539773957492, "rewards/wordcountpos_reward/raw_geo/std": 0.04181931987584059, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1166.875, "completions/mean_terminated_length": 1144.666748046875, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.3822764552910582, "frac_reward_zero_std": 0.0, "grad_norm": 3.367947892053645, "kl": 0.015869140625, "learning_rate": 7.989441830655873e-07, "loss": 0.0071, "num_tokens": 83266737.0, "reward": -7.450580596923828e-09, "reward_std": 1.0459463596343994, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0356829687626893, "rewards/wordcountpos_reward/raw_geo/std": 0.12232737379716122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1142.375, "completions/mean_terminated_length": 1118.533447265625, "completions/min_length": 569.0, "completions/min_terminated_length": 569.0, "epoch": 0.3824764952990598, "frac_reward_zero_std": 0.0, "grad_norm": 1.8571663561070442, "kl": 0.00499725341796875, "learning_rate": 7.986823562661776e-07, "loss": -0.072, "num_tokens": 83321543.0, "reward": -2.9802322387695312e-08, "reward_std": 0.680799126625061, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09314868544358086, "rewards/wordcountpos_reward/raw_geo/std": 0.051863610414294346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1206.375, "completions/mean_terminated_length": 1186.800048828125, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.3826765353070614, "frac_reward_zero_std": 0.0, "grad_norm": 2.9906350833456017, "kl": 0.015838623046875, "learning_rate": 7.984204082081217e-07, "loss": 0.0018, "num_tokens": 83373909.0, "reward": 0.0, "reward_std": 0.6740381121635437, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.26119803744006964, "rewards/wordcountpos_reward/raw_geo/std": 0.4648392765067783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1218.8125, "completions/mean_terminated_length": 1178.6429443359375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.38287657531506303, "frac_reward_zero_std": 0.0, "grad_norm": 3.423520409266596, "kl": 0.01885986328125, "learning_rate": 7.981583390191468e-07, "loss": -0.0093, "num_tokens": 83428282.0, "reward": 0.0, "reward_std": 0.7193698883056641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05477221053843989, "rewards/wordcountpos_reward/raw_geo/std": 0.20083458612699479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1210.625, "completions/mean_terminated_length": 1114.166748046875, "completions/min_length": 407.0, "completions/min_terminated_length": 407.0, "epoch": 0.38307661532306464, "frac_reward_zero_std": 0.0, "grad_norm": 3.2288554004054246, "kl": 0.0160675048828125, "learning_rate": 7.978961488270389e-07, "loss": -0.0348, "num_tokens": 83478780.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9395007491111755, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07076686328315875, "rewards/wordcountpos_reward/raw_geo/std": 0.13727354355278978, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1140.3125, "completions/mean_terminated_length": 1116.3333740234375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.3832766553310662, "frac_reward_zero_std": 0.0, "grad_norm": 3.6249742575345674, "kl": 0.017059326171875, "learning_rate": 7.97633837759643e-07, "loss": -0.0431, "num_tokens": 83516481.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0335273742675781, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029078230617636154, "rewards/wordcountpos_reward/raw_geo/std": 0.0537862839793503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1094.875, "completions/mean_terminated_length": 1094.875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.3834766953390678, "frac_reward_zero_std": 0.0, "grad_norm": 3.374077741996293, "kl": 0.0152587890625, "learning_rate": 7.973714059448634e-07, "loss": -0.0099, "num_tokens": 83566407.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9962517023086548, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05916642950267013, "rewards/wordcountpos_reward/raw_geo/std": 0.13146134105735469, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1186.0625, "completions/mean_terminated_length": 1186.0625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.3836767353470694, "frac_reward_zero_std": 0.0, "grad_norm": 2.0649748135730386, "kl": 0.0080718994140625, "learning_rate": 7.97108853510663e-07, "loss": -0.0155, "num_tokens": 83606872.0, "reward": -3.725290298461914e-09, "reward_std": 1.029812216758728, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07721551483535441, "rewards/wordcountpos_reward/raw_geo/std": 0.08075121500835286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1195.9375, "completions/mean_terminated_length": 1175.666748046875, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.38387677535507103, "frac_reward_zero_std": 0.0, "grad_norm": 3.1599189163991874, "kl": 0.015106201171875, "learning_rate": 7.968461805850635e-07, "loss": -0.0357, "num_tokens": 83649719.0, "reward": 1.862645149230957e-08, "reward_std": 1.005957841873169, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16293989953374052, "rewards/wordcountpos_reward/raw_geo/std": 0.09783115286378856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1143.5, "completions/mean_terminated_length": 1119.7333984375, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.3840768153630726, "frac_reward_zero_std": 0.0, "grad_norm": 3.201375345409664, "kl": 0.017822265625, "learning_rate": 7.965833872961455e-07, "loss": 0.0142, "num_tokens": 83691879.0, "reward": -2.9802322387695312e-08, "reward_std": 0.794905424118042, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.293349978442748, "rewards/wordcountpos_reward/raw_geo/std": 0.12941395563903416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1307.9375, "completions/mean_terminated_length": 1307.9375, "completions/min_length": 1110.0, "completions/min_terminated_length": 1110.0, "epoch": 0.3842768553710742, "frac_reward_zero_std": 0.0, "grad_norm": 2.9169026222099967, "kl": 0.01287841796875, "learning_rate": 7.963204737720481e-07, "loss": 0.0266, "num_tokens": 83727846.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0219136476516724, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04929161266486752, "rewards/wordcountpos_reward/raw_geo/std": 0.08873550372034363, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1097.125, "completions/mean_terminated_length": 1097.125, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.3844768953790758, "frac_reward_zero_std": 0.0, "grad_norm": 3.3889067133813082, "kl": 0.0196990966796875, "learning_rate": 7.960574401409693e-07, "loss": -0.0179, "num_tokens": 83771176.0, "reward": 4.470348358154297e-08, "reward_std": 1.0158910751342773, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1506740780249705, "rewards/wordcountpos_reward/raw_geo/std": 0.2791675303028947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1087.375, "completions/mean_terminated_length": 1059.86669921875, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.3846769353870774, "frac_reward_zero_std": 0.0, "grad_norm": 3.4753802954920094, "kl": 0.019073486328125, "learning_rate": 7.957942865311652e-07, "loss": 0.024, "num_tokens": 83813846.0, "reward": 0.0, "reward_std": 0.81082683801651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11667962115645698, "rewards/wordcountpos_reward/raw_geo/std": 0.17090711079478307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 1023.25, "completions/mean_terminated_length": 1023.25, "completions/min_length": 702.0, "completions/min_terminated_length": 702.0, "epoch": 0.38487697539507904, "frac_reward_zero_std": 0.0, "grad_norm": 3.6915590412072863, "kl": 0.023468017578125, "learning_rate": 7.95531013070951e-07, "loss": -0.0138, "num_tokens": 83863090.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7343430519104004, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06907121137048625, "rewards/wordcountpos_reward/raw_geo/std": 0.09371167337310808, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1222.0, "completions/mean_terminated_length": 1055.2000732421875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.3850770154030806, "frac_reward_zero_std": 0.0, "grad_norm": 3.1274737177814584, "kl": 0.0158233642578125, "learning_rate": 7.952676198886997e-07, "loss": 0.0192, "num_tokens": 83912826.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0447074174880981, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15561142380306806, "rewards/wordcountpos_reward/raw_geo/std": 0.07617022697144082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1351.875, "completions/mean_terminated_length": 1284.5455322265625, "completions/min_length": 1088.0, "completions/min_terminated_length": 1088.0, "epoch": 0.3852770554110822, "frac_reward_zero_std": 0.0, "grad_norm": 2.840321222105165, "kl": 0.0164947509765625, "learning_rate": 7.950041071128433e-07, "loss": -0.0318, "num_tokens": 83968232.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0447741746902466, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.088066999987892, "rewards/wordcountpos_reward/raw_geo/std": 0.0254594803666307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1140.9375, "completions/mean_terminated_length": 1140.9375, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.3854770954190838, "frac_reward_zero_std": 0.0, "grad_norm": 3.7069250903539834, "kl": 0.01629638671875, "learning_rate": 7.947404748718717e-07, "loss": 0.0183, "num_tokens": 84004031.0, "reward": 0.0, "reward_std": 0.6612793803215027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07883841972233559, "rewards/wordcountpos_reward/raw_geo/std": 0.09162495810246825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1033.9375, "completions/mean_terminated_length": 1033.9375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.3856771354270854, "frac_reward_zero_std": 0.0, "grad_norm": 2.9460353264656107, "kl": 0.0142059326171875, "learning_rate": 7.944767232943333e-07, "loss": 0.0391, "num_tokens": 84052886.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8299098610877991, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0011428826439418742, "rewards/wordcountpos_reward/raw_geo/std": 0.062149762707538206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1083.25, "completions/mean_terminated_length": 1083.25, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.38587717543508704, "frac_reward_zero_std": 0.0, "grad_norm": 3.5411879222662117, "kl": 0.023040771484375, "learning_rate": 7.942128525088344e-07, "loss": 0.034, "num_tokens": 84095522.0, "reward": 4.470348358154297e-08, "reward_std": 0.9512640237808228, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01015780803818505, "rewards/wordcountpos_reward/raw_geo/std": 0.12137534592592701, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1154.875, "completions/mean_terminated_length": 1105.571533203125, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.3860772154430886, "frac_reward_zero_std": 0.0, "grad_norm": 3.647945775186784, "kl": 0.0185394287109375, "learning_rate": 7.939488626440398e-07, "loss": -0.0045, "num_tokens": 84137736.0, "reward": 0.0, "reward_std": 0.7156025767326355, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.22226411035292845, "rewards/wordcountpos_reward/raw_geo/std": 0.21882135942235223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722953, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1157.3125, "completions/mean_terminated_length": 1157.3125, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.3862772554510902, "frac_reward_zero_std": 0.0, "grad_norm": 2.975455875513827, "kl": 0.014312744140625, "learning_rate": 7.936847538286718e-07, "loss": -0.025, "num_tokens": 84185613.0, "reward": 0.0, "reward_std": 0.9196183681488037, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0779048186616783, "rewards/wordcountpos_reward/raw_geo/std": 0.13039687629287663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1078.8125, "completions/mean_terminated_length": 1078.8125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3864772954590918, "frac_reward_zero_std": 0.0, "grad_norm": 3.07950053345988, "kl": 0.0143280029296875, "learning_rate": 7.934205261915114e-07, "loss": 0.0185, "num_tokens": 84225122.0, "reward": 0.0, "reward_std": 1.0661547183990479, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05891146031039509, "rewards/wordcountpos_reward/raw_geo/std": 0.07466751504177721, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1143.2308349609375, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.38667733546709343, "frac_reward_zero_std": 0.0, "grad_norm": 2.7950355832783096, "kl": 0.0130462646484375, "learning_rate": 7.931561798613972e-07, "loss": 0.0076, "num_tokens": 84277316.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0157172679901123, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02867748336574919, "rewards/wordcountpos_reward/raw_geo/std": 0.14086856255566693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1185.5, "completions/mean_terminated_length": 1080.666748046875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.38687737547509504, "frac_reward_zero_std": 0.0, "grad_norm": 3.3679682694342388, "kl": 0.01898193359375, "learning_rate": 7.928917149672254e-07, "loss": 0.0517, "num_tokens": 84328588.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9801058173179626, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11899428164145383, "rewards/wordcountpos_reward/raw_geo/std": 0.07336294543278063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1092.8125, "completions/mean_terminated_length": 1092.8125, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.3870774154830966, "frac_reward_zero_std": 0.0, "grad_norm": 3.2581363135207675, "kl": 0.020538330078125, "learning_rate": 7.926271316379505e-07, "loss": -0.0258, "num_tokens": 84377953.0, "reward": 0.0, "reward_std": 0.5870635509490967, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07762638823999722, "rewards/wordcountpos_reward/raw_geo/std": 0.14303668763566904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1117.8125, "completions/mean_terminated_length": 1117.8125, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.3872774554910982, "frac_reward_zero_std": 0.0, "grad_norm": 3.259911931430322, "kl": 0.014923095703125, "learning_rate": 7.923624300025844e-07, "loss": -0.0218, "num_tokens": 84414310.0, "reward": 0.0, "reward_std": 0.8105592727661133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.000733722517796763, "rewards/wordcountpos_reward/raw_geo/std": 0.06051314692325833, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1194.875, "completions/mean_terminated_length": 1194.875, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.3874774954990998, "frac_reward_zero_std": 0.0, "grad_norm": 2.6484300185103353, "kl": 0.0118560791015625, "learning_rate": 7.920976101901968e-07, "loss": -0.056, "num_tokens": 84463524.0, "reward": 0.0, "reward_std": 0.8934857845306396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04211384414680419, "rewards/wordcountpos_reward/raw_geo/std": 0.2333173234249423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1299.5625, "completions/mean_terminated_length": 1299.5625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.38767753550710143, "frac_reward_zero_std": 0.0, "grad_norm": 2.786133816929239, "kl": 0.0147857666015625, "learning_rate": 7.918326723299154e-07, "loss": 0.0176, "num_tokens": 84513917.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0175589323043823, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14600860897973067, "rewards/wordcountpos_reward/raw_geo/std": 0.11890078592203776, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1096.6875, "completions/mean_terminated_length": 1096.6875, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.38787757551510305, "frac_reward_zero_std": 0.0, "grad_norm": 2.8142218958907725, "kl": 0.0144500732421875, "learning_rate": 7.915676165509248e-07, "loss": 0.0167, "num_tokens": 84553056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4527949094772339, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1546822520687281, "rewards/wordcountpos_reward/raw_geo/std": 0.18175917391667706, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1204.1875, "completions/mean_terminated_length": 1161.9285888671875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.3880776155231046, "frac_reward_zero_std": 0.0, "grad_norm": 3.47851492965573, "kl": 0.01708984375, "learning_rate": 7.913024429824672e-07, "loss": -0.0271, "num_tokens": 84600171.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7945734262466431, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03318640885596952, "rewards/wordcountpos_reward/raw_geo/std": 0.04347737846261447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1123.75, "completions/mean_terminated_length": 1070.0, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.3882776555311062, "frac_reward_zero_std": 0.0, "grad_norm": 3.533612689886028, "kl": 0.019317626953125, "learning_rate": 7.910371517538428e-07, "loss": 0.0015, "num_tokens": 84643039.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9811118841171265, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09676382869600902, "rewards/wordcountpos_reward/raw_geo/std": 0.10918625189944568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1114.0, "completions/max_terminated_length": 1114.0, "completions/mean_length": 1023.6875, "completions/mean_terminated_length": 1023.6875, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.3884776955391078, "frac_reward_zero_std": 0.0, "grad_norm": 1.2882336552583928, "kl": 0.0042209625244140625, "learning_rate": 7.907717429944086e-07, "loss": 0.0001, "num_tokens": 84684970.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9430273771286011, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03151630712683913, "rewards/wordcountpos_reward/raw_geo/std": 0.0977104276587023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635781, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1018.75, "completions/mean_terminated_length": 1018.75, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.38867773554710944, "frac_reward_zero_std": 0.0, "grad_norm": 3.6331558978513856, "kl": 0.0198974609375, "learning_rate": 7.905062168335794e-07, "loss": -0.0376, "num_tokens": 84716766.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6383984088897705, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14574582130289615, "rewards/wordcountpos_reward/raw_geo/std": 0.15150392951019787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 1943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 954.75, "completions/mean_terminated_length": 954.75, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.38887777555511105, "frac_reward_zero_std": 0.0, "grad_norm": 3.208395548761791, "kl": 0.0136566162109375, "learning_rate": 7.902405734008267e-07, "loss": 0.0035, "num_tokens": 84766058.0, "reward": -2.9802322387695312e-08, "reward_std": 0.645635724067688, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17979327969462922, "rewards/wordcountpos_reward/raw_geo/std": 0.21916599585048865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1265.3125, "completions/mean_terminated_length": 1187.0833740234375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.3890778155631126, "frac_reward_zero_std": 0.0, "grad_norm": 2.712189161298553, "kl": 0.0118865966796875, "learning_rate": 7.899748128256793e-07, "loss": 0.0129, "num_tokens": 84819799.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8285987973213196, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02933622415489038, "rewards/wordcountpos_reward/raw_geo/std": 0.060910642294256884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1351.125, "completions/mean_terminated_length": 1351.125, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.3892778555711142, "frac_reward_zero_std": 0.0, "grad_norm": 2.769769274498029, "kl": 0.0123291015625, "learning_rate": 7.897089352377237e-07, "loss": 0.006, "num_tokens": 84866361.0, "reward": 0.0, "reward_std": 0.8065032958984375, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.027039899476719222, "rewards/wordcountpos_reward/raw_geo/std": 0.07483926024067868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1019.75, "completions/mean_terminated_length": 1019.75, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.38947789557911583, "frac_reward_zero_std": 0.0, "grad_norm": 3.3684930118765517, "kl": 0.0181732177734375, "learning_rate": 7.894429407666024e-07, "loss": -0.0144, "num_tokens": 84907085.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6613297462463379, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06654591786236366, "rewards/wordcountpos_reward/raw_geo/std": 0.06413234359363085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1286.0625, "completions/mean_terminated_length": 1236.6923828125, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.38967793558711744, "frac_reward_zero_std": 0.0, "grad_norm": 2.0636617240426474, "kl": 0.00885009765625, "learning_rate": 7.891768295420164e-07, "loss": -0.0116, "num_tokens": 84940470.0, "reward": -1.1175870895385742e-08, "reward_std": 0.9511070847511292, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10621054853011609, "rewards/wordcountpos_reward/raw_geo/std": 0.08070195674325308, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1204.25, "completions/mean_terminated_length": 1162.0, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.389877975595119, "frac_reward_zero_std": 0.0, "grad_norm": 3.4070034881137334, "kl": 0.0177459716796875, "learning_rate": 7.889106016937219e-07, "loss": 0.0022, "num_tokens": 84974690.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0560686588287354, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016030748797630112, "rewards/wordcountpos_reward/raw_geo/std": 0.058505419223668136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 973.4375, "completions/mean_terminated_length": 973.4375, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.3900780156031206, "frac_reward_zero_std": 0.0, "grad_norm": 3.62722112152376, "kl": 0.0143280029296875, "learning_rate": 7.886442573515333e-07, "loss": -0.0332, "num_tokens": 85017705.0, "reward": 0.0, "reward_std": 0.9516506195068359, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01808504943915968, "rewards/wordcountpos_reward/raw_geo/std": 0.1288500030165194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6875, "rewards/wordcountpos_reward/raw_rule/std": 0.15533714826025882, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1035.75, "completions/mean_terminated_length": 1035.75, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.3902780556111222, "frac_reward_zero_std": 0.0, "grad_norm": 3.3223812489862357, "kl": 0.013580322265625, "learning_rate": 7.88377796645321e-07, "loss": -0.0466, "num_tokens": 85057565.0, "reward": 0.0, "reward_std": 0.9915995597839355, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0885074400646694, "rewards/wordcountpos_reward/raw_geo/std": 0.07319635640649398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1110.25, "completions/mean_terminated_length": 1084.2667236328125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.39047809561912383, "frac_reward_zero_std": 0.0, "grad_norm": 3.7246271712308556, "kl": 0.0206298828125, "learning_rate": 7.881112197050128e-07, "loss": -0.0254, "num_tokens": 85102025.0, "reward": 0.0, "reward_std": 0.6427106857299805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010441763367497837, "rewards/wordcountpos_reward/raw_geo/std": 0.11485688084397902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1129.5625, "completions/mean_terminated_length": 1104.86669921875, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.39067813562712544, "frac_reward_zero_std": 0.0, "grad_norm": 2.3488824988896155, "kl": 0.0091094970703125, "learning_rate": 7.878445266605926e-07, "loss": 0.0282, "num_tokens": 85146762.0, "reward": 0.0, "reward_std": 0.7387911677360535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015499180445664451, "rewards/wordcountpos_reward/raw_geo/std": 0.05106921887306669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1234.1875, "completions/mean_terminated_length": 1196.21435546875, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.390878175635127, "frac_reward_zero_std": 0.0, "grad_norm": 3.2971555362237144, "kl": 0.015350341796875, "learning_rate": 7.87577717642101e-07, "loss": 0.0105, "num_tokens": 85187941.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8785548210144043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13525414674498734, "rewards/wordcountpos_reward/raw_geo/std": 0.3648798263242208, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1032.375, "completions/mean_terminated_length": 965.5714721679688, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.3910782156431286, "frac_reward_zero_std": 0.0, "grad_norm": 3.2838820027219744, "kl": 0.015777587890625, "learning_rate": 7.873107927796356e-07, "loss": 0.0561, "num_tokens": 85233507.0, "reward": -2.9802322387695312e-08, "reward_std": 0.46401447057724, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18496761020927677, "rewards/wordcountpos_reward/raw_geo/std": 0.132386932920565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1259.875, "completions/mean_terminated_length": 1225.571533203125, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.3912782556511302, "frac_reward_zero_std": 0.0, "grad_norm": 2.905842591462001, "kl": 0.0129852294921875, "learning_rate": 7.8704375220335e-07, "loss": -0.0372, "num_tokens": 85287577.0, "reward": 0.0, "reward_std": 0.5848679542541504, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1104370115588768, "rewards/wordcountpos_reward/raw_geo/std": 0.08239590932674329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 935.0, "completions/max_terminated_length": 935.0, "completions/mean_length": 773.625, "completions/mean_terminated_length": 773.625, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.39147829565913184, "frac_reward_zero_std": 0.0, "grad_norm": 3.331683825317068, "kl": 0.0155181884765625, "learning_rate": 7.867765960434543e-07, "loss": -0.0004, "num_tokens": 85326395.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6733416318893433, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004082887098857441, "rewards/wordcountpos_reward/raw_geo/std": 0.16360180557639162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1173.75, "completions/mean_terminated_length": 1173.75, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.39167833566713345, "frac_reward_zero_std": 0.0, "grad_norm": 3.3701315077128413, "kl": 0.017791748046875, "learning_rate": 7.865093244302153e-07, "loss": 0.0313, "num_tokens": 85363535.0, "reward": 0.0, "reward_std": 0.8786790370941162, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04053263159152689, "rewards/wordcountpos_reward/raw_geo/std": 0.14550624995661623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1181.0, "completions/max_terminated_length": 1181.0, "completions/mean_length": 1004.1875, "completions/mean_terminated_length": 1004.1875, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.391878375675135, "frac_reward_zero_std": 0.0, "grad_norm": 3.2847657188945036, "kl": 0.0160064697265625, "learning_rate": 7.862419374939559e-07, "loss": -0.0249, "num_tokens": 85392338.0, "reward": 0.0, "reward_std": 0.8219481706619263, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.060644614748349764, "rewards/wordcountpos_reward/raw_geo/std": 0.07700475023673792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1369.0, "completions/mean_terminated_length": 1290.4000244140625, "completions/min_length": 1147.0, "completions/min_terminated_length": 1147.0, "epoch": 0.3920784156831366, "frac_reward_zero_std": 0.0, "grad_norm": 2.591051203650811, "kl": 0.0161285400390625, "learning_rate": 7.859744353650548e-07, "loss": -0.0008, "num_tokens": 85447922.0, "reward": 5.960464477539063e-08, "reward_std": 0.8923698663711548, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12203102061812558, "rewards/wordcountpos_reward/raw_geo/std": 0.04566537138325073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 999.375, "completions/mean_terminated_length": 999.375, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.39227845569113823, "frac_reward_zero_std": 0.0, "grad_norm": 3.27906364790894, "kl": 0.013641357421875, "learning_rate": 7.857068181739476e-07, "loss": -0.0111, "num_tokens": 85491416.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9789931774139404, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03943299986504064, "rewards/wordcountpos_reward/raw_geo/std": 0.09417395391686141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1031.0625, "completions/mean_terminated_length": 1031.0625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.39247849569913984, "frac_reward_zero_std": 0.0, "grad_norm": 3.126149456054329, "kl": 0.0142669677734375, "learning_rate": 7.854390860511255e-07, "loss": -0.0124, "num_tokens": 85530617.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0072274208068848, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08151374350387246, "rewards/wordcountpos_reward/raw_geo/std": 0.1187681007615068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1064.769287109375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.39267853570714145, "frac_reward_zero_std": 0.0, "grad_norm": 3.178697301529944, "kl": 0.0169219970703125, "learning_rate": 7.851712391271359e-07, "loss": 0.0214, "num_tokens": 85582503.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9279727339744568, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04226148927376202, "rewards/wordcountpos_reward/raw_geo/std": 0.05063945303439396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1148.1875, "completions/mean_terminated_length": 1148.1875, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.392878575715143, "frac_reward_zero_std": 0.0, "grad_norm": 3.165283861827048, "kl": 0.01641845703125, "learning_rate": 7.849032775325824e-07, "loss": -0.0184, "num_tokens": 85627618.0, "reward": -3.725290298461914e-09, "reward_std": 0.9582778215408325, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.10531646638070305, "rewards/wordcountpos_reward/raw_geo/std": 0.05513195879744111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 972.9375, "completions/mean_terminated_length": 972.9375, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.3930786157231446, "frac_reward_zero_std": 0.0, "grad_norm": 3.6533349241015753, "kl": 0.0177764892578125, "learning_rate": 7.846352013981239e-07, "loss": -0.0008, "num_tokens": 85654105.0, "reward": -7.450580596923828e-09, "reward_std": 0.9160459637641907, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.041281719986851734, "rewards/wordcountpos_reward/raw_geo/std": 0.07520402981820258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1053.25, "completions/mean_terminated_length": 1053.25, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.39327865573114623, "frac_reward_zero_std": 0.0, "grad_norm": 3.0187150971238434, "kl": 0.0170135498046875, "learning_rate": 7.843670108544756e-07, "loss": -0.0331, "num_tokens": 85697989.0, "reward": 0.0, "reward_std": 0.8365430235862732, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13436313206221195, "rewards/wordcountpos_reward/raw_geo/std": 0.046601355610636476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1015.3125, "completions/mean_terminated_length": 1015.3125, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.39347869573914784, "frac_reward_zero_std": 0.0, "grad_norm": 3.529498743530727, "kl": 0.0175933837890625, "learning_rate": 7.840987060324089e-07, "loss": -0.0032, "num_tokens": 85729602.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9387333393096924, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02500254813345758, "rewards/wordcountpos_reward/raw_geo/std": 0.051729833395051696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1294.75, "completions/mean_terminated_length": 1281.0667724609375, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.39367873574714946, "frac_reward_zero_std": 0.0, "grad_norm": 2.8931413053652353, "kl": 0.015594482421875, "learning_rate": 7.8383028706275e-07, "loss": -0.0115, "num_tokens": 85782862.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8478749990463257, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.039036036904426565, "rewards/wordcountpos_reward/raw_geo/std": 0.12657714601589817, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1043.75, "completions/mean_terminated_length": 1043.75, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.393878775755151, "frac_reward_zero_std": 0.0, "grad_norm": 3.4661177599841713, "kl": 0.0149688720703125, "learning_rate": 7.835617540763813e-07, "loss": 0.0093, "num_tokens": 85826954.0, "reward": -1.4901161193847656e-08, "reward_std": 1.000534176826477, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004058456368077229, "rewards/wordcountpos_reward/raw_geo/std": 0.047596678625114505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1172.625, "completions/mean_terminated_length": 976.2000122070312, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.3940788157631526, "frac_reward_zero_std": 0.0, "grad_norm": 3.0060367334951774, "kl": 0.0099029541015625, "learning_rate": 7.832931072042408e-07, "loss": -0.0551, "num_tokens": 85873068.0, "reward": 0.0, "reward_std": 0.5061690807342529, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1147.4375, "completions/mean_terminated_length": 987.1818237304688, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.39427885577115424, "frac_reward_zero_std": 0.0, "grad_norm": 2.6694401207468745, "kl": 0.013132095336914062, "learning_rate": 7.830243465773218e-07, "loss": 0.0079, "num_tokens": 85918627.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5675990581512451, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07759245990465403, "rewards/wordcountpos_reward/raw_geo/std": 0.09095469690428253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1131.625, "completions/mean_terminated_length": 1079.0, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.39447889577915585, "frac_reward_zero_std": 0.0, "grad_norm": 3.762735668511541, "kl": 0.018341064453125, "learning_rate": 7.827554723266733e-07, "loss": 0.0094, "num_tokens": 85960429.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0607939958572388, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14659792675934105, "rewards/wordcountpos_reward/raw_geo/std": 0.2826092775064619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1175.8125, "completions/mean_terminated_length": 1154.2000732421875, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.39467893578715746, "frac_reward_zero_std": 0.0, "grad_norm": 3.0543527230721215, "kl": 0.0151824951171875, "learning_rate": 7.824864845833995e-07, "loss": -0.0117, "num_tokens": 86004066.0, "reward": -7.450580596923828e-09, "reward_std": 0.9978142976760864, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.002552956104634882, "rewards/wordcountpos_reward/raw_geo/std": 0.16578180091000935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 959.1875, "completions/mean_terminated_length": 959.1875, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.394878975795159, "frac_reward_zero_std": 0.0, "grad_norm": 3.4221980911747063, "kl": 0.0134429931640625, "learning_rate": 7.822173834786602e-07, "loss": -0.0622, "num_tokens": 86042933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.722364604473114, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24464280770314523, "rewards/wordcountpos_reward/raw_geo/std": 0.2696868176330769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1141.125, "completions/mean_terminated_length": 1141.125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.3950790158031606, "frac_reward_zero_std": 0.0, "grad_norm": 2.825545300387118, "kl": 0.0166778564453125, "learning_rate": 7.819481691436702e-07, "loss": -0.0465, "num_tokens": 86090775.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0217972993850708, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037414088196032355, "rewards/wordcountpos_reward/raw_geo/std": 0.04124504159552026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1184.3125, "completions/mean_terminated_length": 1163.2667236328125, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.39527905581116224, "frac_reward_zero_std": 0.0, "grad_norm": 3.006456730337618, "kl": 0.0124969482421875, "learning_rate": 7.816788417096997e-07, "loss": -0.0109, "num_tokens": 86132172.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7910186052322388, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09900677930101995, "rewards/wordcountpos_reward/raw_geo/std": 0.07687970713092088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1006.0, "completions/mean_terminated_length": 935.4285888671875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.39547909581916385, "frac_reward_zero_std": 0.0, "grad_norm": 3.000391794135069, "kl": 0.01032257080078125, "learning_rate": 7.814094013080739e-07, "loss": -0.017, "num_tokens": 86176556.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7196230888366699, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07898987385200562, "rewards/wordcountpos_reward/raw_geo/std": 0.0698906690964471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1082.625, "completions/mean_terminated_length": 1054.800048828125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.3956791358271654, "frac_reward_zero_std": 0.0, "grad_norm": 3.321515029077363, "kl": 0.015411376953125, "learning_rate": 7.811398480701733e-07, "loss": 0.04, "num_tokens": 86215646.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0556150674819946, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08417111795794797, "rewards/wordcountpos_reward/raw_geo/std": 0.031393966120172964, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1138.9375, "completions/mean_terminated_length": 1114.86669921875, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.395879175835167, "frac_reward_zero_std": 0.0, "grad_norm": 3.274345722627407, "kl": 0.017059326171875, "learning_rate": 7.80870182127433e-07, "loss": -0.0402, "num_tokens": 86267293.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9667720794677734, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10490092387904479, "rewards/wordcountpos_reward/raw_geo/std": 0.0997550169597763, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 975.9375, "completions/mean_terminated_length": 975.9375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.39607921584316863, "frac_reward_zero_std": 0.0, "grad_norm": 2.9415402233272454, "kl": 0.014892578125, "learning_rate": 7.806004036113436e-07, "loss": -0.0419, "num_tokens": 86306220.0, "reward": 7.450580596923828e-09, "reward_std": 1.068274736404419, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11791751891020814, "rewards/wordcountpos_reward/raw_geo/std": 0.2901043462468264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 1030.5, "completions/mean_terminated_length": 1030.5, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.39627925585117024, "frac_reward_zero_std": 0.0, "grad_norm": 3.6772877641302166, "kl": 0.020477294921875, "learning_rate": 7.803305126534505e-07, "loss": -0.006, "num_tokens": 86338148.0, "reward": -2.9802322387695312e-08, "reward_std": 0.875225305557251, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1484228056040741, "rewards/wordcountpos_reward/raw_geo/std": 0.1545569673816577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1069.125, "completions/mean_terminated_length": 1069.125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.39647929585917185, "frac_reward_zero_std": 0.0, "grad_norm": 2.8270967489767704, "kl": 0.0222625732421875, "learning_rate": 7.800605093853533e-07, "loss": 0.032, "num_tokens": 86388526.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8502848744392395, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033677224899846564, "rewards/wordcountpos_reward/raw_geo/std": 0.18292180713889253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1077.6875, "completions/mean_terminated_length": 1049.533447265625, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.3966793358671734, "frac_reward_zero_std": 0.0, "grad_norm": 2.621222912724427, "kl": 0.017578125, "learning_rate": 7.797903939387071e-07, "loss": -0.0161, "num_tokens": 86431577.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0643887519836426, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0584300029922047, "rewards/wordcountpos_reward/raw_geo/std": 0.12150300693234975, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1120.6875, "completions/mean_terminated_length": 1120.6875, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.396879375875175, "frac_reward_zero_std": 0.0, "grad_norm": 3.316812044703328, "kl": 0.0160369873046875, "learning_rate": 7.795201664452215e-07, "loss": 0.0026, "num_tokens": 86474628.0, "reward": 0.0, "reward_std": 0.7597593069076538, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17380992919918895, "rewards/wordcountpos_reward/raw_geo/std": 0.13096476613901295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1391.75, "completions/mean_terminated_length": 1342.5455322265625, "completions/min_length": 1217.0, "completions/min_terminated_length": 1217.0, "epoch": 0.39707941588317663, "frac_reward_zero_std": 0.0, "grad_norm": 3.0245341290333974, "kl": 0.016387939453125, "learning_rate": 7.792498270366603e-07, "loss": 0.0032, "num_tokens": 86532848.0, "reward": 0.0, "reward_std": 0.938235878944397, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02542469493238682, "rewards/wordcountpos_reward/raw_geo/std": 0.07577014948746762, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1256.25, "completions/mean_terminated_length": 1240.0001220703125, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.39727945589117825, "frac_reward_zero_std": 0.0, "grad_norm": 2.874932738678797, "kl": 0.0143890380859375, "learning_rate": 7.789793758448425e-07, "loss": -0.0165, "num_tokens": 86578980.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8410855531692505, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03504135166237747, "rewards/wordcountpos_reward/raw_geo/std": 0.04493807432453479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1167.75, "completions/mean_terminated_length": 1120.2857666015625, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.39747949589917986, "frac_reward_zero_std": 0.0, "grad_norm": 3.4508761642315715, "kl": 0.01910400390625, "learning_rate": 7.787088130016413e-07, "loss": -0.0205, "num_tokens": 86630328.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8712863922119141, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2989933843555322, "rewards/wordcountpos_reward/raw_geo/std": 0.061886168635224344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1167.25, "completions/mean_terminated_length": 1167.25, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.3976795359071814, "frac_reward_zero_std": 0.0, "grad_norm": 3.386146320953828, "kl": 0.0154876708984375, "learning_rate": 7.784381386389842e-07, "loss": 0.0064, "num_tokens": 86672260.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6728423833847046, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08227600392823269, "rewards/wordcountpos_reward/raw_geo/std": 0.05541545232427812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1161.3125, "completions/mean_terminated_length": 1138.7333984375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.397879575915183, "frac_reward_zero_std": 0.0, "grad_norm": 2.546803036867779, "kl": 0.01361083984375, "learning_rate": 7.781673528888536e-07, "loss": -0.0202, "num_tokens": 86717457.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0329099893569946, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0887698562143239, "rewards/wordcountpos_reward/raw_geo/std": 0.20679781615560397, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1214.375, "completions/mean_terminated_length": 1148.4615478515625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.39807961592318464, "frac_reward_zero_std": 0.0, "grad_norm": 3.2231077577448985, "kl": 0.0168304443359375, "learning_rate": 7.778964558832855e-07, "loss": -0.0624, "num_tokens": 86771127.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7980071306228638, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033462326091415195, "rewards/wordcountpos_reward/raw_geo/std": 0.3019643714403689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 966.0625, "completions/mean_terminated_length": 966.0625, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.39827965593118625, "frac_reward_zero_std": 0.0, "grad_norm": 2.9597762319378003, "kl": 0.015228271484375, "learning_rate": 7.776254477543706e-07, "loss": -0.0345, "num_tokens": 86812520.0, "reward": 0.0, "reward_std": 0.8864554166793823, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15404831626704482, "rewards/wordcountpos_reward/raw_geo/std": 0.2305913883800116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1146.5625, "completions/mean_terminated_length": 1123.0, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.39847969593918786, "frac_reward_zero_std": 0.0, "grad_norm": 3.539974949529452, "kl": 0.01519775390625, "learning_rate": 7.77354328634254e-07, "loss": 0.0054, "num_tokens": 86845473.0, "reward": 0.0, "reward_std": 0.9918215274810791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1194.4375, "completions/mean_terminated_length": 1150.7857666015625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.3986797359471894, "frac_reward_zero_std": 0.0, "grad_norm": 3.158571610173031, "kl": 0.014923095703125, "learning_rate": 7.770830986551341e-07, "loss": 0.0446, "num_tokens": 86888088.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9001962542533875, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1166857819423355, "rewards/wordcountpos_reward/raw_geo/std": 0.07054107705439644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1174.8125, "completions/mean_terminated_length": 1066.416748046875, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.39887977595519103, "frac_reward_zero_std": 0.0, "grad_norm": 3.142714087899282, "kl": 0.0154571533203125, "learning_rate": 7.768117579492643e-07, "loss": -0.0181, "num_tokens": 86943621.0, "reward": 3.725290298461914e-09, "reward_std": 1.0066262483596802, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.01813017647922831, "rewards/wordcountpos_reward/raw_geo/std": 0.11261853532734675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1160.625, "completions/mean_terminated_length": 1160.625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.39907981596319264, "frac_reward_zero_std": 0.0, "grad_norm": 3.673707740666206, "kl": 0.0228271484375, "learning_rate": 7.765403066489513e-07, "loss": 0.0398, "num_tokens": 86991311.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9432878494262695, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08486769472371088, "rewards/wordcountpos_reward/raw_geo/std": 0.14940668347893782, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 1995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1175.75, "completions/mean_terminated_length": 1154.1334228515625, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.39927985597119425, "frac_reward_zero_std": 0.0, "grad_norm": 2.5720892885089075, "kl": 0.01214599609375, "learning_rate": 7.762687448865561e-07, "loss": -0.0178, "num_tokens": 87030875.0, "reward": -2.2351741790771484e-08, "reward_std": 1.053713083267212, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.045502518534684094, "rewards/wordcountpos_reward/raw_geo/std": 0.058434464374287634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1179.125, "completions/mean_terminated_length": 1179.125, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.39947989597919586, "frac_reward_zero_std": 0.0, "grad_norm": 3.076459365274049, "kl": 0.0154571533203125, "learning_rate": 7.759970727944935e-07, "loss": -0.006, "num_tokens": 87077357.0, "reward": 0.0, "reward_std": 0.9475910663604736, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0480510984401983, "rewards/wordcountpos_reward/raw_geo/std": 0.09420258603292415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1267.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1071.875, "completions/mean_terminated_length": 1071.875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.3996799359871974, "frac_reward_zero_std": 0.0, "grad_norm": 3.272286767549367, "kl": 0.018524169921875, "learning_rate": 7.757252905052318e-07, "loss": -0.0004, "num_tokens": 87118083.0, "reward": 0.0, "reward_std": 0.4661349654197693, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04574095976598698, "rewards/wordcountpos_reward/raw_geo/std": 0.13979497245116046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1169.125, "completions/mean_terminated_length": 1169.125, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.39987997599519903, "frac_reward_zero_std": 0.0, "grad_norm": 1.9610670701878443, "kl": 0.00823974609375, "learning_rate": 7.754533981512936e-07, "loss": -0.0044, "num_tokens": 87163789.0, "reward": 0.0, "reward_std": 0.8742091655731201, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07824254678663017, "rewards/wordcountpos_reward/raw_geo/std": 0.08426140337045179, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 1999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1190.375, "completions/mean_terminated_length": 1190.375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.40008001600320064, "frac_reward_zero_std": 0.0, "grad_norm": 2.771438603318052, "kl": 0.0152435302734375, "learning_rate": 7.751813958652548e-07, "loss": -0.0311, "num_tokens": 87205835.0, "reward": 7.450580596923828e-09, "reward_std": 1.0564095973968506, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.01487273727736594, "rewards/wordcountpos_reward/raw_geo/std": 0.11694311570860827, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 949.3125, "completions/mean_terminated_length": 949.3125, "completions/min_length": 651.0, "completions/min_terminated_length": 651.0, "epoch": 0.40028005601120226, "frac_reward_zero_std": 0.0, "grad_norm": 3.5424837599374954, "kl": 0.0196380615234375, "learning_rate": 7.749092837797446e-07, "loss": 0.0235, "num_tokens": 87247352.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9682412147521973, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0529065195002808, "rewards/wordcountpos_reward/raw_geo/std": 0.06393367777017527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 995.75, "completions/mean_terminated_length": 995.75, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.40048009601920387, "frac_reward_zero_std": 0.0, "grad_norm": 2.8319433203000663, "kl": 0.0115509033203125, "learning_rate": 7.746370620274465e-07, "loss": 0.0292, "num_tokens": 87284100.0, "reward": 0.0, "reward_std": 0.459858238697052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14216111604155723, "rewards/wordcountpos_reward/raw_geo/std": 0.16840512124138457, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1275.4375, "completions/mean_terminated_length": 1173.3636474609375, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.4006801360272054, "frac_reward_zero_std": 0.0, "grad_norm": 2.7379636970105667, "kl": 0.013427734375, "learning_rate": 7.743647307410969e-07, "loss": 0.0031, "num_tokens": 87322283.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9399663209915161, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012975837123238644, "rewards/wordcountpos_reward/raw_geo/std": 0.16570775966048343, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1146.375, "completions/mean_terminated_length": 1146.375, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.40088017603520704, "frac_reward_zero_std": 0.0, "grad_norm": 3.3551793435105486, "kl": 0.0163116455078125, "learning_rate": 7.740922900534856e-07, "loss": -0.0036, "num_tokens": 87356729.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0087049007415771, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09683709289776919, "rewards/wordcountpos_reward/raw_geo/std": 0.07471672858507064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1117.0625, "completions/mean_terminated_length": 1091.533447265625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.40108021604320865, "frac_reward_zero_std": 0.0, "grad_norm": 3.026588609175334, "kl": 0.0114593505859375, "learning_rate": 7.738197400974564e-07, "loss": 0.0295, "num_tokens": 87404770.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9784857034683228, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.4242049542659071, "rewards/wordcountpos_reward/raw_geo/std": 0.19599389589433053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1106.375, "completions/mean_terminated_length": 1106.375, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.40128025605121026, "frac_reward_zero_std": 0.0, "grad_norm": 3.121991685182897, "kl": 0.0133514404296875, "learning_rate": 7.735470810059053e-07, "loss": -0.0208, "num_tokens": 87452928.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9602751731872559, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13798971666912754, "rewards/wordcountpos_reward/raw_geo/std": 0.06950985594843091, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1242.375, "completions/mean_terminated_length": 1205.571533203125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.4014802960592118, "frac_reward_zero_std": 0.0, "grad_norm": 3.3719376878165566, "kl": 0.018707275390625, "learning_rate": 7.732743129117824e-07, "loss": -0.0308, "num_tokens": 87502038.0, "reward": 1.862645149230957e-08, "reward_std": 1.0413663387298584, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1170150926005097, "rewards/wordcountpos_reward/raw_geo/std": 0.2695528290375277, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1149.8125, "completions/mean_terminated_length": 939.7000122070312, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.40168033606721343, "frac_reward_zero_std": 0.0, "grad_norm": 3.2200632865657237, "kl": 0.019134521484375, "learning_rate": 7.730014359480907e-07, "loss": -0.0885, "num_tokens": 87545019.0, "reward": -7.450580596923828e-09, "reward_std": 1.0586961507797241, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14060983980338745, "rewards/wordcountpos_reward/raw_geo/std": 0.23461068965993495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1302.3125, "completions/mean_terminated_length": 1212.45458984375, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.40188037607521504, "frac_reward_zero_std": 0.0, "grad_norm": 3.1261668582580775, "kl": 0.01568603515625, "learning_rate": 7.727284502478863e-07, "loss": 0.0112, "num_tokens": 87593984.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9241052865982056, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04575007723610398, "rewards/wordcountpos_reward/raw_geo/std": 0.048677250024858135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1172.1875, "completions/mean_terminated_length": 1172.1875, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.40208041608321665, "frac_reward_zero_std": 0.0, "grad_norm": 3.261402970448502, "kl": 0.0154571533203125, "learning_rate": 7.724553559442781e-07, "loss": -0.0173, "num_tokens": 87642443.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5764257311820984, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1315520100238666, "rewards/wordcountpos_reward/raw_geo/std": 0.10385666474010034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1165.5625, "completions/mean_terminated_length": 1143.2667236328125, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.40228045609121826, "frac_reward_zero_std": 0.0, "grad_norm": 2.918703054887695, "kl": 0.0121002197265625, "learning_rate": 7.721821531704283e-07, "loss": -0.053, "num_tokens": 87683100.0, "reward": 0.0, "reward_std": 0.7176514863967896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2533124261077197, "rewards/wordcountpos_reward/raw_geo/std": 0.22649675183619028, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0768596604689834, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1252.0625, "completions/mean_terminated_length": 1235.533447265625, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.4024804960992198, "frac_reward_zero_std": 0.0, "grad_norm": 3.2716350813914197, "kl": 0.0240478515625, "learning_rate": 7.719088420595516e-07, "loss": -0.0374, "num_tokens": 87734237.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7990478277206421, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17911789470821474, "rewards/wordcountpos_reward/raw_geo/std": 0.30493939745141163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1090.0, "completions/max_terminated_length": 1090.0, "completions/mean_length": 793.1875, "completions/mean_terminated_length": 793.1875, "completions/min_length": 281.0, "completions/min_terminated_length": 281.0, "epoch": 0.40268053610722143, "frac_reward_zero_std": 0.0, "grad_norm": 3.4870374886085167, "kl": 0.0151824951171875, "learning_rate": 7.716354227449159e-07, "loss": -0.1263, "num_tokens": 87772584.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6382958889007568, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007047952449453575, "rewards/wordcountpos_reward/raw_geo/std": 0.06402128728588029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0709720863229836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1183.3125, "completions/mean_terminated_length": 1183.3125, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.40288057611522304, "frac_reward_zero_std": 0.0, "grad_norm": 2.0101676334940155, "kl": 0.00923919677734375, "learning_rate": 7.713618953598415e-07, "loss": -0.0377, "num_tokens": 87815741.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0390887260437012, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.034087998917174685, "rewards/wordcountpos_reward/raw_geo/std": 0.22236871248726692, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1232.1875, "completions/mean_terminated_length": 1193.9285888671875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.40308061612322466, "frac_reward_zero_std": 0.0, "grad_norm": 3.2376242292222543, "kl": 0.0155792236328125, "learning_rate": 7.710882600377019e-07, "loss": -0.0056, "num_tokens": 87872112.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9404686093330383, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05536136053337536, "rewards/wordcountpos_reward/raw_geo/std": 0.06794541813914243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0926962382871743, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1263.3125, "completions/mean_terminated_length": 1229.5, "completions/min_length": 1066.0, "completions/min_terminated_length": 1066.0, "epoch": 0.40328065613122627, "frac_reward_zero_std": 0.0, "grad_norm": 3.1891931442590136, "kl": 0.0175323486328125, "learning_rate": 7.708145169119228e-07, "loss": -0.0075, "num_tokens": 87917645.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9398343563079834, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026015492649178056, "rewards/wordcountpos_reward/raw_geo/std": 0.08755878163167685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1117.5, "completions/mean_terminated_length": 1117.5, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.4034806961392278, "frac_reward_zero_std": 0.0, "grad_norm": 2.573449196600652, "kl": 0.0133209228515625, "learning_rate": 7.705406661159823e-07, "loss": -0.0397, "num_tokens": 87960237.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9650349617004395, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03793046237251892, "rewards/wordcountpos_reward/raw_geo/std": 0.05386961262812178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1184.6875, "completions/mean_terminated_length": 1163.666748046875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.40368073614722944, "frac_reward_zero_std": 0.0, "grad_norm": 3.3661274733673205, "kl": 0.018280029296875, "learning_rate": 7.702667077834119e-07, "loss": -0.0243, "num_tokens": 88008680.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9741467237472534, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04225078337615724, "rewards/wordcountpos_reward/raw_geo/std": 0.10295231765597766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1347.875, "completions/mean_terminated_length": 1256.5999755859375, "completions/min_length": 1047.0, "completions/min_terminated_length": 1047.0, "epoch": 0.40388077615523105, "frac_reward_zero_std": 0.0, "grad_norm": 2.6620084802184114, "kl": 0.014617919921875, "learning_rate": 7.699926420477944e-07, "loss": 0.0015, "num_tokens": 88055566.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8342624306678772, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.050291808811605505, "rewards/wordcountpos_reward/raw_geo/std": 0.16801869726121224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1125.875, "completions/mean_terminated_length": 1125.875, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.40408081616323266, "frac_reward_zero_std": 0.0, "grad_norm": 2.683707778625537, "kl": 0.013336181640625, "learning_rate": 7.697184690427656e-07, "loss": -0.0502, "num_tokens": 88096860.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0109002590179443, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.050527221810071084, "rewards/wordcountpos_reward/raw_geo/std": 0.04788138737786651, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1097.75, "completions/mean_terminated_length": 1070.933349609375, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.40428085617123427, "frac_reward_zero_std": 0.0, "grad_norm": 2.5523300848368207, "kl": 0.0095977783203125, "learning_rate": 7.694441889020136e-07, "loss": -0.0086, "num_tokens": 88129224.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3303879499435425, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026740427492668417, "rewards/wordcountpos_reward/raw_geo/std": 0.2017479797867654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1083.0, "completions/mean_terminated_length": 1083.0, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.4044808961792358, "frac_reward_zero_std": 0.0, "grad_norm": 4.022802154083134, "kl": 0.0177001953125, "learning_rate": 7.691698017592787e-07, "loss": -0.044, "num_tokens": 88170336.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8398323655128479, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0787157712589992, "rewards/wordcountpos_reward/raw_geo/std": 0.07485153255060317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 951.5, "completions/mean_terminated_length": 951.5, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.40468093618723744, "frac_reward_zero_std": 0.0, "grad_norm": 3.529893635453417, "kl": 0.0144500732421875, "learning_rate": 7.688953077483531e-07, "loss": -0.0424, "num_tokens": 88197944.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0359265804290771, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008683578057784283, "rewards/wordcountpos_reward/raw_geo/std": 0.14714482830760922, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.14291929864761416, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1263.1875, "completions/mean_terminated_length": 1263.1875, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.40488097619523905, "frac_reward_zero_std": 0.0, "grad_norm": 3.254976096468187, "kl": 0.0175628662109375, "learning_rate": 7.686207070030816e-07, "loss": -0.0114, "num_tokens": 88244955.0, "reward": 0.0, "reward_std": 0.8881120681762695, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10074038578232664, "rewards/wordcountpos_reward/raw_geo/std": 0.11450299358547654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 970.6875, "completions/mean_terminated_length": 970.6875, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.40508101620324066, "frac_reward_zero_std": 0.0, "grad_norm": 3.302863040698689, "kl": 0.0177001953125, "learning_rate": 7.683459996573606e-07, "loss": 0.0127, "num_tokens": 88287758.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5573809742927551, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10347736332590747, "rewards/wordcountpos_reward/raw_geo/std": 0.22965461948647736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 915.4375, "completions/mean_terminated_length": 915.4375, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.4052810562112423, "frac_reward_zero_std": 0.0, "grad_norm": 3.4262998380742125, "kl": 0.016845703125, "learning_rate": 7.680711858451386e-07, "loss": -0.0214, "num_tokens": 88328501.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5690656900405884, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05071751960228229, "rewards/wordcountpos_reward/raw_geo/std": 0.16112665048400482, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1209.375, "completions/mean_terminated_length": 1112.5, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.40548109621924383, "frac_reward_zero_std": 0.0, "grad_norm": 2.8500007973060284, "kl": 0.01018524169921875, "learning_rate": 7.677962657004163e-07, "loss": -0.0051, "num_tokens": 88374099.0, "reward": 7.450580596923828e-09, "reward_std": 1.0380803346633911, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.033187844267933114, "rewards/wordcountpos_reward/raw_geo/std": 0.08607501191682741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1043.375, "completions/mean_terminated_length": 1043.375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.40568113622724544, "frac_reward_zero_std": 0.0, "grad_norm": 3.8268179821267037, "kl": 0.02008056640625, "learning_rate": 7.675212393572458e-07, "loss": -0.018, "num_tokens": 88421929.0, "reward": 0.0, "reward_std": 0.994317889213562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09346816370366212, "rewards/wordcountpos_reward/raw_geo/std": 0.053421809005386865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1182.8125, "completions/mean_terminated_length": 1182.8125, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.40588117623524705, "frac_reward_zero_std": 0.0, "grad_norm": 3.6822445721987886, "kl": 0.020355224609375, "learning_rate": 7.672461069497311e-07, "loss": 0.0008, "num_tokens": 88473790.0, "reward": 7.450580596923828e-09, "reward_std": 1.045255422592163, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.12597461277893662, "rewards/wordcountpos_reward/raw_geo/std": 0.0695546083523287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1157.125, "completions/mean_terminated_length": 1157.125, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.40608121624324867, "frac_reward_zero_std": 0.0, "grad_norm": 2.6648191144722886, "kl": 0.0130157470703125, "learning_rate": 7.669708686120282e-07, "loss": -0.0174, "num_tokens": 88520520.0, "reward": 0.0, "reward_std": 0.6994332075119019, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053039662927343426, "rewards/wordcountpos_reward/raw_geo/std": 0.06071828529019722, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1046.5, "completions/mean_terminated_length": 1016.2667236328125, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.4062812562512502, "frac_reward_zero_std": 0.0, "grad_norm": 3.7546123453008176, "kl": 0.01690673828125, "learning_rate": 7.666955244783446e-07, "loss": -0.0021, "num_tokens": 88571040.0, "reward": 0.0, "reward_std": 0.7088227868080139, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0169050756431196, "rewards/wordcountpos_reward/raw_geo/std": 0.13581023028243117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16329931618554522, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1379.5625, "completions/mean_terminated_length": 1324.8182373046875, "completions/min_length": 1228.0, "completions/min_terminated_length": 1228.0, "epoch": 0.40648129625925183, "frac_reward_zero_std": 0.0, "grad_norm": 2.782324058666112, "kl": 0.0131072998046875, "learning_rate": 7.664200746829393e-07, "loss": -0.0225, "num_tokens": 88624945.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0094621181488037, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1439212991656275, "rewards/wordcountpos_reward/raw_geo/std": 0.0869358797954705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13770607453181927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1061.875, "completions/mean_terminated_length": 1061.875, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.40668133626725345, "frac_reward_zero_std": 0.0, "grad_norm": 2.758924741517208, "kl": 0.0139312744140625, "learning_rate": 7.661445193601227e-07, "loss": 0.0155, "num_tokens": 88658559.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9225866198539734, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03801201802497672, "rewards/wordcountpos_reward/raw_geo/std": 0.09938046293912063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1078.75, "completions/mean_terminated_length": 1078.75, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.40688137627525506, "frac_reward_zero_std": 0.0, "grad_norm": 3.4951870684687822, "kl": 0.016143798828125, "learning_rate": 7.658688586442572e-07, "loss": -0.0185, "num_tokens": 88698395.0, "reward": -7.450580596923828e-09, "reward_std": 1.0503628253936768, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10119376379546063, "rewards/wordcountpos_reward/raw_geo/std": 0.05626616568481989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1339.5, "completions/mean_terminated_length": 1179.0, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.40708141628325667, "frac_reward_zero_std": 0.0, "grad_norm": 2.5002544585043425, "kl": 0.011962890625, "learning_rate": 7.655930926697558e-07, "loss": -0.0286, "num_tokens": 88742283.0, "reward": 0.0, "reward_std": 0.995416522026062, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07154875748608237, "rewards/wordcountpos_reward/raw_geo/std": 0.10484524130850494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1164.5, "completions/mean_terminated_length": 1052.666748046875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.4072814562912582, "frac_reward_zero_std": 0.0, "grad_norm": 3.089861000251956, "kl": 0.0166168212890625, "learning_rate": 7.653172215710835e-07, "loss": -0.0216, "num_tokens": 88784091.0, "reward": 0.0, "reward_std": 1.0287597179412842, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05303123316805859, "rewards/wordcountpos_reward/raw_geo/std": 0.14431200922678045, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1054.9375, "completions/mean_terminated_length": 1054.9375, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.40748149629925984, "frac_reward_zero_std": 0.0, "grad_norm": 3.2330003934764617, "kl": 0.0161590576171875, "learning_rate": 7.65041245482756e-07, "loss": 0.0036, "num_tokens": 88822626.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8834084272384644, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1771450856793201, "rewards/wordcountpos_reward/raw_geo/std": 0.08420064658387864, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1110.375, "completions/mean_terminated_length": 1110.375, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.40768153630726145, "frac_reward_zero_std": 0.0, "grad_norm": 3.17644107161702, "kl": 0.0139617919921875, "learning_rate": 7.647651645393412e-07, "loss": -0.0452, "num_tokens": 88862320.0, "reward": 1.4901161193847656e-08, "reward_std": 0.970673680305481, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03456906638147079, "rewards/wordcountpos_reward/raw_geo/std": 0.03797650568480985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1085.4375, "completions/mean_terminated_length": 1085.4375, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.40788157631526306, "frac_reward_zero_std": 0.0, "grad_norm": 2.545426819670408, "kl": 0.01318359375, "learning_rate": 7.644889788754565e-07, "loss": 0.0006, "num_tokens": 88905255.0, "reward": -3.725290298461914e-09, "reward_std": 1.0670288801193237, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1871330729637628, "rewards/wordcountpos_reward/raw_geo/std": 0.20611074142022165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.20651428073658284, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1333.25, "completions/mean_terminated_length": 1294.769287109375, "completions/min_length": 1149.0, "completions/min_terminated_length": 1149.0, "epoch": 0.4080816163232647, "frac_reward_zero_std": 0.0, "grad_norm": 2.689368048228186, "kl": 0.014617919921875, "learning_rate": 7.642126886257718e-07, "loss": 0.0038, "num_tokens": 88947555.0, "reward": 0.0, "reward_std": 0.5388506054878235, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1033548942525688, "rewards/wordcountpos_reward/raw_geo/std": 0.1922912245546802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1037.5625, "completions/mean_terminated_length": 1037.5625, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.40828165633126623, "frac_reward_zero_std": 0.0, "grad_norm": 2.9137011114260973, "kl": 0.015777587890625, "learning_rate": 7.639362939250076e-07, "loss": -0.0251, "num_tokens": 88989452.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9181332588195801, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.051140333796764643, "rewards/wordcountpos_reward/raw_geo/std": 0.0582240380851468, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1005.3125, "completions/mean_terminated_length": 1005.3125, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.40848169633926784, "frac_reward_zero_std": 0.0, "grad_norm": 2.886785813691314, "kl": 0.015411376953125, "learning_rate": 7.636597949079349e-07, "loss": 0.0134, "num_tokens": 89021089.0, "reward": -2.9802322387695312e-08, "reward_std": 0.46501588821411133, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.050743404696746444, "rewards/wordcountpos_reward/raw_geo/std": 0.266148005408282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1235.3125, "completions/mean_terminated_length": 1174.2308349609375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.40868173634726945, "frac_reward_zero_std": 0.0, "grad_norm": 2.917314711779519, "kl": 0.0157470703125, "learning_rate": 7.633831917093759e-07, "loss": -0.0335, "num_tokens": 89062142.0, "reward": 0.0, "reward_std": 0.6006075143814087, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00836710492061403, "rewards/wordcountpos_reward/raw_geo/std": 0.05968047302288801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 973.9375, "completions/mean_terminated_length": 973.9375, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.40888177635527106, "frac_reward_zero_std": 0.0, "grad_norm": 3.8942760811244876, "kl": 0.02642822265625, "learning_rate": 7.631064844642038e-07, "loss": 0.0043, "num_tokens": 89095229.0, "reward": 0.0, "reward_std": 0.44809669256210327, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05653067637117895, "rewards/wordcountpos_reward/raw_geo/std": 0.08137999870137888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 1013.875, "completions/mean_terminated_length": 1013.875, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.4090818163632727, "frac_reward_zero_std": 0.0, "grad_norm": 2.8248150389240343, "kl": 0.013702392578125, "learning_rate": 7.628296733073423e-07, "loss": -0.0203, "num_tokens": 89135451.0, "reward": 0.0, "reward_std": 1.004736065864563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03067343372932397, "rewards/wordcountpos_reward/raw_geo/std": 0.05617795184422883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1199.25, "completions/mean_terminated_length": 1156.2857666015625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.40928185637127423, "frac_reward_zero_std": 0.0, "grad_norm": 2.128159173903565, "kl": 0.011474609375, "learning_rate": 7.625527583737655e-07, "loss": 0.071, "num_tokens": 89185567.0, "reward": 0.0, "reward_std": 0.566319465637207, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.038798853676351344, "rewards/wordcountpos_reward/raw_geo/std": 0.04820861544842434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 962.6875, "completions/mean_terminated_length": 962.6875, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.40948189637927584, "frac_reward_zero_std": 0.0, "grad_norm": 3.763269064204829, "kl": 0.02105712890625, "learning_rate": 7.622757397984985e-07, "loss": -0.0126, "num_tokens": 89225882.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9923650026321411, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16031767452327955, "rewards/wordcountpos_reward/raw_geo/std": 0.0896534699541145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1213.0, "completions/mean_terminated_length": 1213.0, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.40968193638727746, "frac_reward_zero_std": 0.0, "grad_norm": 3.1762308232329106, "kl": 0.019439697265625, "learning_rate": 7.61998617716617e-07, "loss": 0.0107, "num_tokens": 89276074.0, "reward": -9.313225746154785e-09, "reward_std": 1.0561612844467163, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1296913380581479, "rewards/wordcountpos_reward/raw_geo/std": 0.11148589211398827, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1180.4375, "completions/mean_terminated_length": 1180.4375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.40988197639527907, "frac_reward_zero_std": 0.0, "grad_norm": 2.9182356454156104, "kl": 0.0137481689453125, "learning_rate": 7.617213922632467e-07, "loss": -0.0298, "num_tokens": 89325985.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8708808422088623, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04771130579046416, "rewards/wordcountpos_reward/raw_geo/std": 0.13394100207852241, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1111.875, "completions/mean_terminated_length": 1111.875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.4100820164032807, "frac_reward_zero_std": 0.0, "grad_norm": 3.164204186474517, "kl": 0.0154266357421875, "learning_rate": 7.61444063573564e-07, "loss": 0.0351, "num_tokens": 89368495.0, "reward": 1.4901161193847656e-08, "reward_std": 1.044725775718689, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07867019600153423, "rewards/wordcountpos_reward/raw_geo/std": 0.06818803039306831, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1253.75, "completions/mean_terminated_length": 1237.3333740234375, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.41028205641128224, "frac_reward_zero_std": 0.0, "grad_norm": 3.036523508013887, "kl": 0.0133209228515625, "learning_rate": 7.611666317827961e-07, "loss": 0.009, "num_tokens": 89411019.0, "reward": 5.960464477539063e-08, "reward_std": 0.7230154275894165, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20641280355932198, "rewards/wordcountpos_reward/raw_geo/std": 0.15154443766626036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1207.125, "completions/mean_terminated_length": 1074.0, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.41048209641928385, "frac_reward_zero_std": 0.0, "grad_norm": 3.194906965312906, "kl": 0.0151519775390625, "learning_rate": 7.608890970262194e-07, "loss": -0.0334, "num_tokens": 89463021.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9793373346328735, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19304215050991594, "rewards/wordcountpos_reward/raw_geo/std": 0.14045923439566074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1305.25, "completions/mean_terminated_length": 1188.4000244140625, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.41068213642728546, "frac_reward_zero_std": 0.0, "grad_norm": 3.4130134389985316, "kl": 0.01953125, "learning_rate": 7.606114594391614e-07, "loss": 0.0429, "num_tokens": 89512881.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9639958739280701, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18308032544016586, "rewards/wordcountpos_reward/raw_geo/std": 0.17327321093521772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1170.5, "completions/mean_terminated_length": 1094.4615478515625, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.41088217643528707, "frac_reward_zero_std": 0.0, "grad_norm": 3.501576734003423, "kl": 0.0205078125, "learning_rate": 7.603337191569995e-07, "loss": 0.0198, "num_tokens": 89565129.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8766465187072754, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15048604226117676, "rewards/wordcountpos_reward/raw_geo/std": 0.1318830278348216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316813, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1115.4375, "completions/mean_terminated_length": 1089.800048828125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.4110822164432887, "frac_reward_zero_std": 0.0, "grad_norm": 2.7013266127547038, "kl": 0.011871337890625, "learning_rate": 7.600558763151609e-07, "loss": -0.0234, "num_tokens": 89612792.0, "reward": 0.0, "reward_std": 0.9542516469955444, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025360884233926455, "rewards/wordcountpos_reward/raw_geo/std": 0.08969494356618637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1126.5, "completions/mean_terminated_length": 1126.5, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.41128225645129024, "frac_reward_zero_std": 0.0, "grad_norm": 3.0313822861550443, "kl": 0.0144500732421875, "learning_rate": 7.597779310491233e-07, "loss": -0.0094, "num_tokens": 89654240.0, "reward": 7.450580596923828e-09, "reward_std": 1.0250530242919922, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16178594466181528, "rewards/wordcountpos_reward/raw_geo/std": 0.06281511360393291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1339.8125, "completions/mean_terminated_length": 1243.7000732421875, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.41148229645929185, "frac_reward_zero_std": 0.0, "grad_norm": 2.6363181380115677, "kl": 0.0130157470703125, "learning_rate": 7.594998834944139e-07, "loss": -0.0144, "num_tokens": 89703821.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7662883996963501, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07618260684247634, "rewards/wordcountpos_reward/raw_geo/std": 0.20384475137161223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1040.5, "completions/mean_terminated_length": 1009.86669921875, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.41168233646729346, "frac_reward_zero_std": 0.0, "grad_norm": 3.634354417200075, "kl": 0.02099609375, "learning_rate": 7.592217337866099e-07, "loss": 0.0202, "num_tokens": 89749581.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9276562929153442, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10220173829743892, "rewards/wordcountpos_reward/raw_geo/std": 0.07072056711310329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1125.4375, "completions/mean_terminated_length": 1125.4375, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.4118823764752951, "frac_reward_zero_std": 0.0, "grad_norm": 3.799994663178922, "kl": 0.021942138671875, "learning_rate": 7.589434820613385e-07, "loss": -0.0115, "num_tokens": 89797492.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9818932414054871, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035810393339433054, "rewards/wordcountpos_reward/raw_geo/std": 0.08534932228070492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1177.625, "completions/mean_terminated_length": 1177.625, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.41208241648329663, "frac_reward_zero_std": 0.0, "grad_norm": 3.406603934451574, "kl": 0.02069091796875, "learning_rate": 7.586651284542764e-07, "loss": -0.0145, "num_tokens": 89844326.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4696350395679474, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009232874008731518, "rewards/wordcountpos_reward/raw_geo/std": 0.09914829138801044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1116.3125, "completions/mean_terminated_length": 1116.3125, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.41228245649129824, "frac_reward_zero_std": 0.0, "grad_norm": 2.8422803642651533, "kl": 0.0128936767578125, "learning_rate": 7.583866731011503e-07, "loss": -0.0112, "num_tokens": 89879891.0, "reward": 0.0, "reward_std": 0.8208340406417847, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.001930772935147407, "rewards/wordcountpos_reward/raw_geo/std": 0.07682183032810573, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1028.3125, "completions/mean_terminated_length": 996.86669921875, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.41248249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 2.768109000132557, "kl": 0.011627197265625, "learning_rate": 7.58108116137736e-07, "loss": -0.0563, "num_tokens": 89919944.0, "reward": 0.0, "reward_std": 0.7818885445594788, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04149919408085247, "rewards/wordcountpos_reward/raw_geo/std": 0.09680058631779366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1170.4375, "completions/mean_terminated_length": 1123.357177734375, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.41268253650730147, "frac_reward_zero_std": 0.0, "grad_norm": 3.5488807089181744, "kl": 0.01812744140625, "learning_rate": 7.578294576998594e-07, "loss": -0.0218, "num_tokens": 89964079.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9745845794677734, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08138645476230073, "rewards/wordcountpos_reward/raw_geo/std": 0.12812007706990433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792297, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1139.0625, "completions/mean_terminated_length": 1139.0625, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.4128825765153031, "frac_reward_zero_std": 0.0, "grad_norm": 3.2722204030994773, "kl": 0.017242431640625, "learning_rate": 7.575506979233953e-07, "loss": -0.0324, "num_tokens": 90005272.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8312982320785522, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.043079059438433, "rewards/wordcountpos_reward/raw_geo/std": 0.09665574101102234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1269.4375, "completions/mean_terminated_length": 1192.5833740234375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.41308261652330464, "frac_reward_zero_std": 0.0, "grad_norm": 2.8631410046086234, "kl": 0.0160675048828125, "learning_rate": 7.572718369442687e-07, "loss": -0.0219, "num_tokens": 90050295.0, "reward": 0.0, "reward_std": 0.372414767742157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08181829650758538, "rewards/wordcountpos_reward/raw_geo/std": 0.13593997492731055, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1233.25, "completions/mean_terminated_length": 1171.6923828125, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.41328265653130625, "frac_reward_zero_std": 0.0, "grad_norm": 2.978865664675899, "kl": 0.013671875, "learning_rate": 7.569928748984528e-07, "loss": 0.0293, "num_tokens": 90103907.0, "reward": 0.0, "reward_std": 0.6105406880378723, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26495903494321543, "rewards/wordcountpos_reward/raw_geo/std": 0.16023582947442397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1058.4375, "completions/mean_terminated_length": 1058.4375, "completions/min_length": 525.0, "completions/min_terminated_length": 525.0, "epoch": 0.41348269653930786, "frac_reward_zero_std": 0.0, "grad_norm": 3.3487245976576427, "kl": 0.0154266357421875, "learning_rate": 7.567138119219714e-07, "loss": -0.0666, "num_tokens": 90135618.0, "reward": 7.450580596923828e-09, "reward_std": 1.0354893207550049, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13816507423818963, "rewards/wordcountpos_reward/raw_geo/std": 0.08789927065426092, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.15910630036178586, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1397.25, "completions/mean_terminated_length": 1317.3333740234375, "completions/min_length": 1207.0, "completions/min_terminated_length": 1207.0, "epoch": 0.41368273654730947, "frac_reward_zero_std": 0.0, "grad_norm": 2.6886323591229924, "kl": 0.0136260986328125, "learning_rate": 7.564346481508968e-07, "loss": 0.0076, "num_tokens": 90189862.0, "reward": 0.0, "reward_std": 0.5482354164123535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17847203409595283, "rewards/wordcountpos_reward/raw_geo/std": 0.18905682168610427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1218.1875, "completions/mean_terminated_length": 1153.1539306640625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.4138827765553111, "frac_reward_zero_std": 0.0, "grad_norm": 2.5632002153930653, "kl": 0.01239013671875, "learning_rate": 7.561553837213501e-07, "loss": -0.0194, "num_tokens": 90224073.0, "reward": 0.0, "reward_std": 0.7337994575500488, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03758347407710905, "rewards/wordcountpos_reward/raw_geo/std": 0.10236958409243328, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1337.9375, "completions/mean_terminated_length": 1264.272705078125, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.41408281656331264, "frac_reward_zero_std": 0.0, "grad_norm": 2.4603539314854657, "kl": 0.013336181640625, "learning_rate": 7.558760187695024e-07, "loss": -0.005, "num_tokens": 90269128.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8532613515853882, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15741531884336663, "rewards/wordcountpos_reward/raw_geo/std": 0.0714625029877396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635778, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1178.4375, "completions/mean_terminated_length": 1178.4375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.41428285657131425, "frac_reward_zero_std": 0.0, "grad_norm": 3.1907335864247566, "kl": 0.0168304443359375, "learning_rate": 7.555965534315728e-07, "loss": 0.0171, "num_tokens": 90306967.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9006006717681885, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013994510680231987, "rewards/wordcountpos_reward/raw_geo/std": 0.04523594850600485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1179.375, "completions/mean_terminated_length": 1133.571533203125, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.41448289657931586, "frac_reward_zero_std": 0.0, "grad_norm": 2.795707271503921, "kl": 0.01190185546875, "learning_rate": 7.553169878438304e-07, "loss": -0.0192, "num_tokens": 90357277.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0277823209762573, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044225348474259706, "rewards/wordcountpos_reward/raw_geo/std": 0.04543688629738194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1179.75, "completions/mean_terminated_length": 1158.4000244140625, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.4146829365873175, "frac_reward_zero_std": 0.0, "grad_norm": 3.1929262809920385, "kl": 0.0135345458984375, "learning_rate": 7.550373221425923e-07, "loss": -0.0545, "num_tokens": 90404297.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9724725484848022, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0034347289522071254, "rewards/wordcountpos_reward/raw_geo/std": 0.06987854243074156, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1114.0, "completions/mean_terminated_length": 1114.0, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.4148829765953191, "frac_reward_zero_std": 0.0, "grad_norm": 2.479275896891316, "kl": 0.012908935546875, "learning_rate": 7.547575564642248e-07, "loss": 0.0131, "num_tokens": 90454329.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9107528328895569, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07750057609836389, "rewards/wordcountpos_reward/raw_geo/std": 0.09219415217931887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1125.625, "completions/mean_terminated_length": 1125.625, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.41508301660332064, "frac_reward_zero_std": 0.0, "grad_norm": 3.4919590359670747, "kl": 0.02532958984375, "learning_rate": 7.544776909451431e-07, "loss": -0.0114, "num_tokens": 90503939.0, "reward": -5.960464477539063e-08, "reward_std": 0.4249512553215027, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025326373913481873, "rewards/wordcountpos_reward/raw_geo/std": 0.3514281334133568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 1029.875, "completions/mean_terminated_length": 1029.875, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.41528305661132225, "frac_reward_zero_std": 0.0, "grad_norm": 3.7298016608331546, "kl": 0.01953125, "learning_rate": 7.541977257218108e-07, "loss": 0.0176, "num_tokens": 90541529.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0426719188690186, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3400510205162337, "rewards/wordcountpos_reward/raw_geo/std": 0.2716175605676903, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1191.75, "completions/mean_terminated_length": 1147.71435546875, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.41548309661932387, "frac_reward_zero_std": 0.0, "grad_norm": 3.4649779852669815, "kl": 0.018096923828125, "learning_rate": 7.5391766093074e-07, "loss": -0.0368, "num_tokens": 90592533.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9908446073532104, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19079838409134126, "rewards/wordcountpos_reward/raw_geo/std": 0.2869768172138338, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.22835969028738007, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1101.1875, "completions/mean_terminated_length": 1101.1875, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.4156831366273255, "frac_reward_zero_std": 0.0, "grad_norm": 3.6747845204549434, "kl": 0.024444580078125, "learning_rate": 7.53637496708492e-07, "loss": -0.0289, "num_tokens": 90639080.0, "reward": 0.0, "reward_std": 1.0571057796478271, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.041197205891221514, "rewards/wordcountpos_reward/raw_geo/std": 0.10386749530301603, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1159.625, "completions/mean_terminated_length": 1046.166748046875, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.4158831766353271, "frac_reward_zero_std": 0.0, "grad_norm": 3.371502689483141, "kl": 0.0194091796875, "learning_rate": 7.533572331916758e-07, "loss": -0.0277, "num_tokens": 90691626.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8431044220924377, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19477465929993776, "rewards/wordcountpos_reward/raw_geo/std": 0.24605395026168847, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1010.375, "completions/mean_terminated_length": 1010.375, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.41608321664332865, "frac_reward_zero_std": 0.0, "grad_norm": 3.4397107630440527, "kl": 0.015716552734375, "learning_rate": 7.530768705169492e-07, "loss": 0.0054, "num_tokens": 90737936.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9361487627029419, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01942549303046435, "rewards/wordcountpos_reward/raw_geo/std": 0.09736502101124805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1241.625, "completions/mean_terminated_length": 1224.4000244140625, "completions/min_length": 1079.0, "completions/min_terminated_length": 1079.0, "epoch": 0.41628325665133026, "frac_reward_zero_std": 0.0, "grad_norm": 2.863190522150523, "kl": 0.015228271484375, "learning_rate": 7.527964088210185e-07, "loss": -0.022, "num_tokens": 90783338.0, "reward": 0.0, "reward_std": 0.5437297821044922, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.020743565133731386, "rewards/wordcountpos_reward/raw_geo/std": 0.3019135317612351, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1319.75, "completions/mean_terminated_length": 1294.0, "completions/min_length": 1112.0, "completions/min_terminated_length": 1112.0, "epoch": 0.41648329665933187, "frac_reward_zero_std": 0.0, "grad_norm": 2.4290680691815196, "kl": 0.0194549560546875, "learning_rate": 7.525158482406378e-07, "loss": -0.0062, "num_tokens": 90842830.0, "reward": 7.450580596923828e-09, "reward_std": 1.061673641204834, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.011732253423559286, "rewards/wordcountpos_reward/raw_geo/std": 0.15602785951111706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1368.125, "completions/mean_terminated_length": 1289.0, "completions/min_length": 1112.0, "completions/min_terminated_length": 1112.0, "epoch": 0.4166833366673335, "frac_reward_zero_std": 0.0, "grad_norm": 3.288213701834495, "kl": 0.01861572265625, "learning_rate": 7.522351889126102e-07, "loss": -0.0193, "num_tokens": 90883360.0, "reward": 0.0, "reward_std": 0.545283854007721, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07383398908649212, "rewards/wordcountpos_reward/raw_geo/std": 0.19546813815770966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1120.0625, "completions/mean_terminated_length": 1120.0625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.4168833766753351, "frac_reward_zero_std": 0.0, "grad_norm": 3.4956768113018724, "kl": 0.0206298828125, "learning_rate": 7.51954430973786e-07, "loss": -0.0367, "num_tokens": 90923553.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9575765132904053, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058622720304457004, "rewards/wordcountpos_reward/raw_geo/std": 0.39406352527743715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 925.6875, "completions/mean_terminated_length": 925.6875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.41708341668333665, "frac_reward_zero_std": 0.0, "grad_norm": 3.446405891985137, "kl": 0.0166015625, "learning_rate": 7.516735745610641e-07, "loss": -0.024, "num_tokens": 90953380.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9469360113143921, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01817722348221632, "rewards/wordcountpos_reward/raw_geo/std": 0.04299081115555371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1410.5625, "completions/mean_terminated_length": 1369.9091796875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.41728345669133826, "frac_reward_zero_std": 0.0, "grad_norm": 3.210758994039939, "kl": 0.01715087890625, "learning_rate": 7.513926198113914e-07, "loss": -0.0314, "num_tokens": 91004781.0, "reward": 1.4901161193847656e-08, "reward_std": 0.987002968788147, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1092987159397148, "rewards/wordcountpos_reward/raw_geo/std": 0.14007728443363104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1121.25, "completions/mean_terminated_length": 1096.0, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.4174834966993399, "frac_reward_zero_std": 0.0, "grad_norm": 3.3975816688683764, "kl": 0.01837158203125, "learning_rate": 7.51111566861763e-07, "loss": -0.0307, "num_tokens": 91047569.0, "reward": 0.0, "reward_std": 0.8085219264030457, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0033324245136461747, "rewards/wordcountpos_reward/raw_geo/std": 0.12600505472459714, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1111.0, "completions/mean_terminated_length": 1111.0, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.4176835367073415, "frac_reward_zero_std": 0.0, "grad_norm": 3.1668830453060486, "kl": 0.015289306640625, "learning_rate": 7.508304158492213e-07, "loss": -0.0706, "num_tokens": 91090025.0, "reward": 0.0, "reward_std": 0.9704805612564087, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.004661345165028416, "rewards/wordcountpos_reward/raw_geo/std": 0.05958402491443301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1192.1875, "completions/mean_terminated_length": 1192.1875, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.41788357671534304, "frac_reward_zero_std": 0.0, "grad_norm": 2.2158621071918447, "kl": 0.0099639892578125, "learning_rate": 7.505491669108569e-07, "loss": -0.0002, "num_tokens": 91130084.0, "reward": 2.9802322387695312e-08, "reward_std": 0.40500861406326294, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11247286672665173, "rewards/wordcountpos_reward/raw_geo/std": 0.12321445524977395, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1270.0, "completions/max_terminated_length": 1270.0, "completions/mean_length": 998.0, "completions/mean_terminated_length": 998.0, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.41808361672334465, "frac_reward_zero_std": 0.0, "grad_norm": 3.2796832732705377, "kl": 0.016448974609375, "learning_rate": 7.50267820183808e-07, "loss": -0.0105, "num_tokens": 91174460.0, "reward": 2.9802322387695312e-08, "reward_std": 1.010758638381958, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0444285709151329, "rewards/wordcountpos_reward/raw_geo/std": 0.06436371811092535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1267.25, "completions/mean_terminated_length": 1234.0, "completions/min_length": 1074.0, "completions/min_terminated_length": 1074.0, "epoch": 0.41828365673134627, "frac_reward_zero_std": 0.0, "grad_norm": 2.9998455394845798, "kl": 0.01611328125, "learning_rate": 7.499863758052606e-07, "loss": -0.0156, "num_tokens": 91220440.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0279719829559326, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11911207955189734, "rewards/wordcountpos_reward/raw_geo/std": 0.06424228517799624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1037.1875, "completions/mean_terminated_length": 1037.1875, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.4184836967393479, "frac_reward_zero_std": 0.0, "grad_norm": 3.567877835576157, "kl": 0.019073486328125, "learning_rate": 7.497048339124482e-07, "loss": -0.0565, "num_tokens": 91257323.0, "reward": -2.9802322387695312e-08, "reward_std": 0.69542396068573, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04858513199473057, "rewards/wordcountpos_reward/raw_geo/std": 0.20226970435639133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1328.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1100.375, "completions/mean_terminated_length": 1100.375, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.4186837367473495, "frac_reward_zero_std": 0.0, "grad_norm": 2.9253643884832523, "kl": 0.015411376953125, "learning_rate": 7.494231946426519e-07, "loss": -0.054, "num_tokens": 91302521.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7786675691604614, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05062957902798822, "rewards/wordcountpos_reward/raw_geo/std": 0.06792296354811327, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1227.125, "completions/mean_terminated_length": 1227.125, "completions/min_length": 1006.0, "completions/min_terminated_length": 1006.0, "epoch": 0.41888377675535104, "frac_reward_zero_std": 0.0, "grad_norm": 3.1775355501142664, "kl": 0.0154571533203125, "learning_rate": 7.491414581332006e-07, "loss": -0.0218, "num_tokens": 91344403.0, "reward": 0.0, "reward_std": 0.7530436515808105, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01941710237647685, "rewards/wordcountpos_reward/raw_geo/std": 0.15423348119811264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1134.1875, "completions/mean_terminated_length": 1109.800048828125, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.41908381676335266, "frac_reward_zero_std": 0.0, "grad_norm": 3.0519358491835966, "kl": 0.020172119140625, "learning_rate": 7.488596245214697e-07, "loss": -0.103, "num_tokens": 91397462.0, "reward": 0.0, "reward_std": 0.949368417263031, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0775313643860014, "rewards/wordcountpos_reward/raw_geo/std": 0.08801193893100527, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1069.1875, "completions/mean_terminated_length": 1069.1875, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.41928385677135427, "frac_reward_zero_std": 0.0, "grad_norm": 2.4995844556007727, "kl": 0.0123291015625, "learning_rate": 7.485776939448831e-07, "loss": -0.0072, "num_tokens": 91441361.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9480688571929932, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13104801257382598, "rewards/wordcountpos_reward/raw_geo/std": 0.14976798700733054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 881.375, "completions/mean_terminated_length": 881.375, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.4194838967793559, "frac_reward_zero_std": 0.0, "grad_norm": 3.0481566725015634, "kl": 0.012847900390625, "learning_rate": 7.482956665409112e-07, "loss": -0.0316, "num_tokens": 91473791.0, "reward": -7.450580596923828e-09, "reward_std": 1.016474962234497, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10161582047525834, "rewards/wordcountpos_reward/raw_geo/std": 0.07074008888748137, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0787635937708768, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1315.4375, "completions/mean_terminated_length": 1171.888916015625, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.4196839367873575, "frac_reward_zero_std": 0.0, "grad_norm": 3.0007038556477656, "kl": 0.0157928466796875, "learning_rate": 7.480135424470717e-07, "loss": 0.0505, "num_tokens": 91519070.0, "reward": 0.0, "reward_std": 0.6106951832771301, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08955058690465117, "rewards/wordcountpos_reward/raw_geo/std": 0.24551011220863236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 896.3125, "completions/mean_terminated_length": 896.3125, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.41988397679535905, "frac_reward_zero_std": 0.0, "grad_norm": 2.3693378302989974, "kl": 0.0071563720703125, "learning_rate": 7.477313218009298e-07, "loss": 0.0008, "num_tokens": 91558411.0, "reward": 0.0, "reward_std": 0.9191321730613708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.028245000228038795, "rewards/wordcountpos_reward/raw_geo/std": 0.13156894185448953, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1328.25, "completions/mean_terminated_length": 1194.6666259765625, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.42008401680336066, "frac_reward_zero_std": 0.0, "grad_norm": 3.2918027764638613, "kl": 0.015838623046875, "learning_rate": 7.474490047400974e-07, "loss": 0.0134, "num_tokens": 91615983.0, "reward": -4.470348358154297e-08, "reward_std": 0.9341504573822021, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11534054034153396, "rewards/wordcountpos_reward/raw_geo/std": 0.12813777647152896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 996.75, "completions/mean_terminated_length": 996.75, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.42028405681136227, "frac_reward_zero_std": 0.0, "grad_norm": 3.9427661434646226, "kl": 0.024444580078125, "learning_rate": 7.47166591402234e-07, "loss": -0.0078, "num_tokens": 91657587.0, "reward": 0.0, "reward_std": 1.013602614402771, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06922052166624437, "rewards/wordcountpos_reward/raw_geo/std": 0.055285234301498266, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1219.25, "completions/mean_terminated_length": 1219.25, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.4204840968193639, "frac_reward_zero_std": 0.0, "grad_norm": 3.0864747483885706, "kl": 0.017791748046875, "learning_rate": 7.468840819250452e-07, "loss": -0.0122, "num_tokens": 91706607.0, "reward": -3.725290298461914e-09, "reward_std": 1.0228955745697021, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1152985676495635, "rewards/wordcountpos_reward/raw_geo/std": 0.1887500256543752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1441.125, "completions/mean_terminated_length": 1365.4285888671875, "completions/min_length": 1190.0, "completions/min_terminated_length": 1190.0, "epoch": 0.4206841368273655, "frac_reward_zero_std": 0.0, "grad_norm": 3.0705984391190007, "kl": 0.01568603515625, "learning_rate": 7.46601476446284e-07, "loss": 0.0072, "num_tokens": 91756817.0, "reward": 0.0, "reward_std": 0.9790076613426208, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03394485995890771, "rewards/wordcountpos_reward/raw_geo/std": 0.22049671935689555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1103.25, "completions/mean_terminated_length": 1103.25, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.42088417683536705, "frac_reward_zero_std": 0.0, "grad_norm": 3.2831901197201434, "kl": 0.0155792236328125, "learning_rate": 7.4631877510375e-07, "loss": 0.0036, "num_tokens": 91801629.0, "reward": 0.0, "reward_std": 0.9568842649459839, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07334292309432021, "rewards/wordcountpos_reward/raw_geo/std": 0.12388169283218321, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1230.9375, "completions/mean_terminated_length": 1108.6363525390625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.42108421684336866, "frac_reward_zero_std": 0.0, "grad_norm": 3.1898968760174955, "kl": 0.0147247314453125, "learning_rate": 7.460359780352899e-07, "loss": -0.0243, "num_tokens": 91848396.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9036942720413208, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05656258864493367, "rewards/wordcountpos_reward/raw_geo/std": 0.07248217835395042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.6583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.21343747458109497, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 984.5625, "completions/mean_terminated_length": 984.5625, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.4212842568513703, "frac_reward_zero_std": 0.0, "grad_norm": 2.0698436770530693, "kl": 0.00830078125, "learning_rate": 7.457530853787968e-07, "loss": -0.0024, "num_tokens": 91887077.0, "reward": 0.0, "reward_std": 0.9309990406036377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.017744026092094967, "rewards/wordcountpos_reward/raw_geo/std": 0.12859092277396178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1067.4375, "completions/mean_terminated_length": 1005.6428833007812, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.4214842968593719, "frac_reward_zero_std": 0.0, "grad_norm": 3.1880328316050837, "kl": 0.014923095703125, "learning_rate": 7.454700972722102e-07, "loss": -0.0448, "num_tokens": 91933324.0, "reward": -5.960464477539063e-08, "reward_std": 0.982169508934021, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060741053216613904, "rewards/wordcountpos_reward/raw_geo/std": 0.04227630795646526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1090.5, "completions/mean_terminated_length": 1063.2000732421875, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.4216843368673735, "frac_reward_zero_std": 0.0, "grad_norm": 2.5574930769301747, "kl": 0.011737823486328125, "learning_rate": 7.451870138535166e-07, "loss": -0.0324, "num_tokens": 91970972.0, "reward": 0.0, "reward_std": 0.8463442921638489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10194253160546154, "rewards/wordcountpos_reward/raw_geo/std": 0.20210181124980264, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 806.5625, "completions/mean_terminated_length": 806.5625, "completions/min_length": 551.0, "completions/min_terminated_length": 551.0, "epoch": 0.42188437687537506, "frac_reward_zero_std": 0.0, "grad_norm": 3.784901416370076, "kl": 0.0189056396484375, "learning_rate": 7.449038352607488e-07, "loss": -0.0683, "num_tokens": 91994829.0, "reward": 1.30385160446167e-08, "reward_std": 1.0559343099594116, "rewards/wordcountpos_reward/mean": 1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011166725609109952, "rewards/wordcountpos_reward/raw_geo/std": 0.040449103341952175, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1567612007930345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1012.625, "completions/mean_terminated_length": 1012.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.42208441688337667, "frac_reward_zero_std": 0.0, "grad_norm": 2.6817927225013785, "kl": 0.013580322265625, "learning_rate": 7.44620561631986e-07, "loss": 0.0006, "num_tokens": 92025879.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0289065837860107, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10180578145207173, "rewards/wordcountpos_reward/raw_geo/std": 0.1375884599411024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1146.0, "completions/mean_terminated_length": 1122.4000244140625, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.4222844568913783, "frac_reward_zero_std": 0.0, "grad_norm": 3.571083853955385, "kl": 0.0213623046875, "learning_rate": 7.443371931053535e-07, "loss": -0.0421, "num_tokens": 92077399.0, "reward": -7.450580596923828e-09, "reward_std": 1.0656661987304688, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.022847170960027627, "rewards/wordcountpos_reward/raw_geo/std": 0.08072001861568547, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1109.1875, "completions/mean_terminated_length": 1083.1334228515625, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.4224844968993799, "frac_reward_zero_std": 0.0, "grad_norm": 3.2431338735518906, "kl": 0.021240234375, "learning_rate": 7.44053729819023e-07, "loss": -0.0292, "num_tokens": 92124690.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9944485425949097, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.25791616242572957, "rewards/wordcountpos_reward/raw_geo/std": 0.12096965727606981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1195.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 1025.125, "completions/mean_terminated_length": 1025.125, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.4226845369073815, "frac_reward_zero_std": 0.0, "grad_norm": 2.7541300381633613, "kl": 0.015777587890625, "learning_rate": 7.437701719112128e-07, "loss": 0.0342, "num_tokens": 92160652.0, "reward": -7.450580596923828e-09, "reward_std": 1.0448180437088013, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.03875125946746098, "rewards/wordcountpos_reward/raw_geo/std": 0.08171747478572673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1155.4375, "completions/mean_terminated_length": 1155.4375, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.42288457691538306, "frac_reward_zero_std": 0.0, "grad_norm": 3.221857294526419, "kl": 0.0171051025390625, "learning_rate": 7.434865195201869e-07, "loss": -0.0052, "num_tokens": 92199355.0, "reward": 0.0, "reward_std": 1.0028592348098755, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04899455780819799, "rewards/wordcountpos_reward/raw_geo/std": 0.031059352004889382, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1252.375, "completions/mean_terminated_length": 1139.8182373046875, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.42308461692338467, "frac_reward_zero_std": 0.0, "grad_norm": 3.2300397698081262, "kl": 0.0171661376953125, "learning_rate": 7.432027727842555e-07, "loss": -0.0292, "num_tokens": 92246329.0, "reward": -1.4901161193847656e-08, "reward_std": 0.97001051902771, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03326791010423786, "rewards/wordcountpos_reward/raw_geo/std": 0.14317097797242592, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1333.1875, "completions/mean_terminated_length": 1203.4444580078125, "completions/min_length": 1032.0, "completions/min_terminated_length": 1032.0, "epoch": 0.4232846569313863, "frac_reward_zero_std": 0.0, "grad_norm": 3.1611602587489642, "kl": 0.015472412109375, "learning_rate": 7.429189318417746e-07, "loss": 0.0052, "num_tokens": 92296492.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9514833688735962, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21373388837332893, "rewards/wordcountpos_reward/raw_geo/std": 0.12174831557800625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 1105.8125, "completions/mean_terminated_length": 1105.8125, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.4234846969393879, "frac_reward_zero_std": 0.0, "grad_norm": 3.5267179254989816, "kl": 0.017822265625, "learning_rate": 7.426349968311468e-07, "loss": -0.0156, "num_tokens": 92347409.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0239317417144775, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.25862958044991186, "rewards/wordcountpos_reward/raw_geo/std": 0.25403812706836704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1063.6875, "completions/mean_terminated_length": 1034.60009765625, "completions/min_length": 762.0, "completions/min_terminated_length": 762.0, "epoch": 0.42368473694738945, "frac_reward_zero_std": 0.0, "grad_norm": 3.3661390225094507, "kl": 0.018890380859375, "learning_rate": 7.423509678908197e-07, "loss": -0.0051, "num_tokens": 92385708.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9235712289810181, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07560614490515084, "rewards/wordcountpos_reward/raw_geo/std": 0.10583007961241211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1297.875, "completions/mean_terminated_length": 1269.0, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.42388477695539106, "frac_reward_zero_std": 0.0, "grad_norm": 2.9865159092024234, "kl": 0.015655517578125, "learning_rate": 7.420668451592869e-07, "loss": -0.0077, "num_tokens": 92431690.0, "reward": 0.0, "reward_std": 0.789594829082489, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.25656617977992063, "rewards/wordcountpos_reward/raw_geo/std": 0.22852696395318764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1400.1875, "completions/mean_terminated_length": 1300.375, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.4240848169633927, "frac_reward_zero_std": 0.0, "grad_norm": 2.779624827336919, "kl": 0.0148773193359375, "learning_rate": 7.417826287750885e-07, "loss": -0.0074, "num_tokens": 92483061.0, "reward": 0.0, "reward_std": 1.0267531871795654, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06342242955296158, "rewards/wordcountpos_reward/raw_geo/std": 0.04490086795074983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1060.6875, "completions/mean_terminated_length": 1060.6875, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.4242848569713943, "frac_reward_zero_std": 0.0, "grad_norm": 1.3762277279362907, "kl": 0.005512237548828125, "learning_rate": 7.414983188768096e-07, "loss": 0.0084, "num_tokens": 92521576.0, "reward": -3.725290298461914e-08, "reward_std": 1.0431268215179443, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022898235947164985, "rewards/wordcountpos_reward/raw_geo/std": 0.06608229534916647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1088.25, "completions/mean_terminated_length": 1088.25, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.4244848969793959, "frac_reward_zero_std": 0.0, "grad_norm": 2.304297925635469, "kl": 0.0107879638671875, "learning_rate": 7.412139156030805e-07, "loss": -0.0142, "num_tokens": 92564068.0, "reward": -7.450580596923828e-09, "reward_std": 1.024080753326416, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1231282080283537, "rewards/wordcountpos_reward/raw_geo/std": 0.2471292860995408, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 997.6875, "completions/mean_terminated_length": 997.6875, "completions/min_length": 593.0, "completions/min_terminated_length": 593.0, "epoch": 0.42468493698739745, "frac_reward_zero_std": 0.0, "grad_norm": 3.6615454070730253, "kl": 0.0180816650390625, "learning_rate": 7.409294190925782e-07, "loss": -0.0196, "num_tokens": 92594839.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0589087009429932, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0036771719896750822, "rewards/wordcountpos_reward/raw_geo/std": 0.14173352014988091, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189793, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1173.4375, "completions/mean_terminated_length": 1173.4375, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.42488497699539907, "frac_reward_zero_std": 0.0, "grad_norm": 2.7859803084954415, "kl": 0.01324462890625, "learning_rate": 7.40644829484024e-07, "loss": -0.0247, "num_tokens": 92632158.0, "reward": -7.450580596923828e-09, "reward_std": 1.0415492057800293, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05057861281172059, "rewards/wordcountpos_reward/raw_geo/std": 0.13555374575762463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1194.9375, "completions/mean_terminated_length": 1093.25, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.4250850170034007, "frac_reward_zero_std": 0.0, "grad_norm": 3.268630673410806, "kl": 0.0167083740234375, "learning_rate": 7.403601469161853e-07, "loss": -0.0112, "num_tokens": 92686325.0, "reward": -7.450580596923828e-09, "reward_std": 0.8918318748474121, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09715566491318618, "rewards/wordcountpos_reward/raw_geo/std": 0.20044858561363674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1287.3125, "completions/mean_terminated_length": 1190.6363525390625, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.4252850570114023, "frac_reward_zero_std": 0.0, "grad_norm": 3.2094580630991674, "kl": 0.019500732421875, "learning_rate": 7.400753715278745e-07, "loss": -0.0121, "num_tokens": 92728210.0, "reward": 0.0, "reward_std": 0.7152417898178101, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2699887823746627, "rewards/wordcountpos_reward/raw_geo/std": 0.2695910093363396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1015.8125, "completions/mean_terminated_length": 1015.8125, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.4254850970194039, "frac_reward_zero_std": 0.0, "grad_norm": 2.6189722691691366, "kl": 0.010345458984375, "learning_rate": 7.397905034579494e-07, "loss": -0.0717, "num_tokens": 92759871.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9207649230957031, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06702639419193329, "rewards/wordcountpos_reward/raw_geo/std": 0.057876271996628675, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1038.6875, "completions/mean_terminated_length": 1038.6875, "completions/min_length": 574.0, "completions/min_terminated_length": 574.0, "epoch": 0.42568513702740546, "frac_reward_zero_std": 0.0, "grad_norm": 3.3022605769444735, "kl": 0.014739990234375, "learning_rate": 7.395055428453131e-07, "loss": -0.0043, "num_tokens": 92810074.0, "reward": -7.450580596923828e-09, "reward_std": 1.0503129959106445, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.024865092498550168, "rewards/wordcountpos_reward/raw_geo/std": 0.09756323363034665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1211.5625, "completions/mean_terminated_length": 1211.5625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.42588517703540707, "frac_reward_zero_std": 0.0, "grad_norm": 2.667628962156022, "kl": 0.0118408203125, "learning_rate": 7.392204898289134e-07, "loss": -0.0203, "num_tokens": 92849371.0, "reward": -1.862645149230957e-08, "reward_std": 0.9325770139694214, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07107859620042047, "rewards/wordcountpos_reward/raw_geo/std": 0.04491260352728652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1181.8125, "completions/mean_terminated_length": 1136.357177734375, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.4260852170434087, "frac_reward_zero_std": 0.0, "grad_norm": 3.3003861935650356, "kl": 0.026885986328125, "learning_rate": 7.389353445477438e-07, "loss": -0.0318, "num_tokens": 92894392.0, "reward": 0.0, "reward_std": 0.8808838725090027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12988125583969076, "rewards/wordcountpos_reward/raw_geo/std": 0.0903182320319016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1165.1875, "completions/mean_terminated_length": 1165.1875, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.4262852570514103, "frac_reward_zero_std": 0.0, "grad_norm": 2.890331334742341, "kl": 0.014404296875, "learning_rate": 7.386501071408421e-07, "loss": -0.0317, "num_tokens": 92936867.0, "reward": 0.0, "reward_std": 1.0450416803359985, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00010713366187898362, "rewards/wordcountpos_reward/raw_geo/std": 0.056218926004073, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1167.875, "completions/mean_terminated_length": 909.5555419921875, "completions/min_length": 511.0, "completions/min_terminated_length": 511.0, "epoch": 0.4264852970594119, "frac_reward_zero_std": 0.0, "grad_norm": 2.3664856743950367, "kl": 0.0171661376953125, "learning_rate": 7.383647777472918e-07, "loss": -0.0165, "num_tokens": 92972217.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8657695651054382, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07118739156503408, "rewards/wordcountpos_reward/raw_geo/std": 0.1942629707543221, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1087.3125, "completions/mean_terminated_length": 1028.357177734375, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.42668533706741346, "frac_reward_zero_std": 0.0, "grad_norm": 3.1677011746090913, "kl": 0.0157012939453125, "learning_rate": 7.380793565062202e-07, "loss": 0.0162, "num_tokens": 93018862.0, "reward": 0.0, "reward_std": 0.9480376243591309, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2056158501151542, "rewards/wordcountpos_reward/raw_geo/std": 0.16300356198882654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1093.5, "completions/mean_terminated_length": 1093.5, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.4268853770754151, "frac_reward_zero_std": 0.0, "grad_norm": 3.5348154407317427, "kl": 0.01910400390625, "learning_rate": 7.377938435568006e-07, "loss": -0.0003, "num_tokens": 93061318.0, "reward": 0.0, "reward_std": 1.0517101287841797, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13623521088706908, "rewards/wordcountpos_reward/raw_geo/std": 0.12053135680472313, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1040.8125, "completions/mean_terminated_length": 1040.8125, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.4270854170834167, "frac_reward_zero_std": 0.0, "grad_norm": 3.2074126259343294, "kl": 0.01519775390625, "learning_rate": 7.375082390382498e-07, "loss": 0.012, "num_tokens": 93101299.0, "reward": 7.450580596923828e-09, "reward_std": 1.019124984741211, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04698691608835712, "rewards/wordcountpos_reward/raw_geo/std": 0.11327493887519208, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1439.25, "completions/mean_terminated_length": 1257.0, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.4272854570914183, "frac_reward_zero_std": 0.0, "grad_norm": 3.073605310934566, "kl": 0.018524169921875, "learning_rate": 7.372225430898303e-07, "loss": 0.0161, "num_tokens": 93160159.0, "reward": 0.0, "reward_std": 0.8811906576156616, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1703712665647365, "rewards/wordcountpos_reward/raw_geo/std": 0.10200929903482862, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1215.4375, "completions/mean_terminated_length": 1196.4666748046875, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.4274854970994199, "frac_reward_zero_std": 0.0, "grad_norm": 3.4302196678813224, "kl": 0.019683837890625, "learning_rate": 7.36936755850849e-07, "loss": -0.0157, "num_tokens": 93206766.0, "reward": 0.0, "reward_std": 0.8466488718986511, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09526268177112367, "rewards/wordcountpos_reward/raw_geo/std": 0.09691484073455478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1378.75, "completions/mean_terminated_length": 1284.4444580078125, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.42768553710742147, "frac_reward_zero_std": 0.0, "grad_norm": 2.6556181492522826, "kl": 0.0130615234375, "learning_rate": 7.366508774606562e-07, "loss": -0.0013, "num_tokens": 93249834.0, "reward": 7.450580596923828e-09, "reward_std": 1.0441550016403198, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.011184607465928288, "rewards/wordcountpos_reward/raw_geo/std": 0.07879209806825507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1255.5625, "completions/mean_terminated_length": 1220.6429443359375, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.4278855771154231, "frac_reward_zero_std": 0.0, "grad_norm": 3.0928409656154727, "kl": 0.016754150390625, "learning_rate": 7.363649080586483e-07, "loss": 0.0343, "num_tokens": 93296955.0, "reward": 0.0, "reward_std": 0.3523564636707306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021427714377580565, "rewards/wordcountpos_reward/raw_geo/std": 0.16992102366565548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1145.6875, "completions/mean_terminated_length": 1145.6875, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.4280856171234247, "frac_reward_zero_std": 0.0, "grad_norm": 3.7976292872185073, "kl": 0.022613525390625, "learning_rate": 7.360788477842648e-07, "loss": -0.0079, "num_tokens": 93340822.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9339010715484619, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10288298645662364, "rewards/wordcountpos_reward/raw_geo/std": 0.13943343527328206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1201.1875, "completions/mean_terminated_length": 1181.2667236328125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.4282856571314263, "frac_reward_zero_std": 0.0, "grad_norm": 3.1129219929203895, "kl": 0.017974853515625, "learning_rate": 7.357926967769902e-07, "loss": -0.0072, "num_tokens": 93376657.0, "reward": 0.0, "reward_std": 0.6890926957130432, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0023664901699822012, "rewards/wordcountpos_reward/raw_geo/std": 0.015545208519344203, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1447.1875, "completions/mean_terminated_length": 1359.166748046875, "completions/min_length": 1233.0, "completions/min_terminated_length": 1233.0, "epoch": 0.4284856971394279, "frac_reward_zero_std": 0.0, "grad_norm": 2.54161628198496, "kl": 0.0142364501953125, "learning_rate": 7.355064551763529e-07, "loss": -0.0136, "num_tokens": 93434596.0, "reward": -3.725290298461914e-09, "reward_std": 1.0219638347625732, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1273771832198033, "rewards/wordcountpos_reward/raw_geo/std": 0.09391625310137386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1081.0625, "completions/mean_terminated_length": 1053.1334228515625, "completions/min_length": 720.0, "completions/min_terminated_length": 720.0, "epoch": 0.42868573714742947, "frac_reward_zero_std": 0.0, "grad_norm": 3.091032957855706, "kl": 0.0179901123046875, "learning_rate": 7.352201231219259e-07, "loss": -0.0455, "num_tokens": 93470893.0, "reward": 1.4901161193847656e-08, "reward_std": 1.06145441532135, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22096655212717714, "rewards/wordcountpos_reward/raw_geo/std": 0.10127304783244871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1215.75, "completions/mean_terminated_length": 1196.800048828125, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.4288857771554311, "frac_reward_zero_std": 0.0, "grad_norm": 2.89854014762766, "kl": 0.01396942138671875, "learning_rate": 7.349337007533255e-07, "loss": 0.0292, "num_tokens": 93518105.0, "reward": 0.0, "reward_std": 0.8543663024902344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09900604685344672, "rewards/wordcountpos_reward/raw_geo/std": 0.11311657445443103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1079.75, "completions/mean_terminated_length": 1079.75, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.4290858171634327, "frac_reward_zero_std": 0.0, "grad_norm": 3.4544910276791434, "kl": 0.0190277099609375, "learning_rate": 7.346471882102131e-07, "loss": 0.0076, "num_tokens": 93560357.0, "reward": 7.450580596923828e-09, "reward_std": 1.0530225038528442, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02324477716121821, "rewards/wordcountpos_reward/raw_geo/std": 0.060503749673239966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 1010.5625, "completions/mean_terminated_length": 977.9334106445312, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.4292858571714343, "frac_reward_zero_std": 0.0, "grad_norm": 3.526797345709653, "kl": 0.0174560546875, "learning_rate": 7.343605856322932e-07, "loss": 0.0032, "num_tokens": 93598894.0, "reward": 0.0, "reward_std": 1.0582648515701294, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1519884053377457, "rewards/wordcountpos_reward/raw_geo/std": 0.07192882587566005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1170.5625, "completions/mean_terminated_length": 1170.5625, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.42948589717943586, "frac_reward_zero_std": 0.0, "grad_norm": 2.4841034666245476, "kl": 0.0119781494140625, "learning_rate": 7.340738931593146e-07, "loss": -0.0403, "num_tokens": 93642647.0, "reward": 0.0, "reward_std": 0.8921295404434204, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12481630481741353, "rewards/wordcountpos_reward/raw_geo/std": 0.08619827664364133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 950.9375, "completions/mean_terminated_length": 950.9375, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.42968593718743747, "frac_reward_zero_std": 0.0, "grad_norm": 3.724661030737698, "kl": 0.019073486328125, "learning_rate": 7.337871109310697e-07, "loss": 0.0218, "num_tokens": 93678774.0, "reward": 0.0, "reward_std": 0.8784717321395874, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05909827530501944, "rewards/wordcountpos_reward/raw_geo/std": 0.04526722615267514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965646, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1162.6875, "completions/mean_terminated_length": 1114.5, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.4298859771954391, "frac_reward_zero_std": 0.0, "grad_norm": 3.3588106342756427, "kl": 0.01751708984375, "learning_rate": 7.33500239087395e-07, "loss": 0.0418, "num_tokens": 93720081.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0083593130111694, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03895372613073129, "rewards/wordcountpos_reward/raw_geo/std": 0.12101072247097232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1231.5, "completions/mean_terminated_length": 1213.60009765625, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.4300860172034407, "frac_reward_zero_std": 0.0, "grad_norm": 3.0326207380509977, "kl": 0.0184326171875, "learning_rate": 7.332132777681706e-07, "loss": -0.0097, "num_tokens": 93771369.0, "reward": -2.9802322387695312e-08, "reward_std": 0.607256293296814, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07954872736745766, "rewards/wordcountpos_reward/raw_geo/std": 0.21401068726531622, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1289.125, "completions/mean_terminated_length": 1259.0, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.4302860572114423, "frac_reward_zero_std": 0.0, "grad_norm": 3.1142078821091186, "kl": 0.01763916015625, "learning_rate": 7.329262271133198e-07, "loss": -0.0429, "num_tokens": 93823915.0, "reward": 0.0, "reward_std": 0.6093506813049316, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2259015681347038, "rewards/wordcountpos_reward/raw_geo/std": 0.1194908982189704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1258.666748046875, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.43048609721944386, "frac_reward_zero_std": 0.0, "grad_norm": 2.3193892416648776, "kl": 0.0107879638671875, "learning_rate": 7.326390872628102e-07, "loss": -0.0262, "num_tokens": 93871343.0, "reward": 0.0, "reward_std": 0.5609322786331177, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02853177124143106, "rewards/wordcountpos_reward/raw_geo/std": 0.1260168658606449, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1252.0, "completions/mean_terminated_length": 1216.571533203125, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.4306861372274455, "frac_reward_zero_std": 0.0, "grad_norm": 3.259653291819354, "kl": 0.01739501953125, "learning_rate": 7.323518583566521e-07, "loss": 0.0603, "num_tokens": 93926423.0, "reward": -5.960464477539063e-08, "reward_std": 0.45667293667793274, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029977440059928907, "rewards/wordcountpos_reward/raw_geo/std": 0.46407024841511413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.21460558137093164, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1168.8125, "completions/mean_terminated_length": 1168.8125, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.4308861772354471, "frac_reward_zero_std": 0.0, "grad_norm": 3.2886704471465618, "kl": 0.019561767578125, "learning_rate": 7.320645405349001e-07, "loss": -0.0174, "num_tokens": 93978380.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0365486145019531, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10105111094867072, "rewards/wordcountpos_reward/raw_geo/std": 0.08363337683014345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 950.75, "completions/mean_terminated_length": 950.75, "completions/min_length": 670.0, "completions/min_terminated_length": 670.0, "epoch": 0.4310862172434487, "frac_reward_zero_std": 0.0, "grad_norm": 3.184763087958931, "kl": 0.016693115234375, "learning_rate": 7.317771339376514e-07, "loss": 0.0061, "num_tokens": 94019608.0, "reward": 0.0, "reward_std": 0.7347003817558289, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10433863647797192, "rewards/wordcountpos_reward/raw_geo/std": 0.2506956566582781, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1323.6875, "completions/mean_terminated_length": 1298.5, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.4312862572514503, "frac_reward_zero_std": 0.0, "grad_norm": 3.0809044443439144, "kl": 0.0177001953125, "learning_rate": 7.314896387050464e-07, "loss": -0.012, "num_tokens": 94075923.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0512480735778809, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09361275022173515, "rewards/wordcountpos_reward/raw_geo/std": 0.08136049104415535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19398358082484618, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1047.3125, "completions/mean_terminated_length": 1017.1333618164062, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.43148629725945187, "frac_reward_zero_std": 0.0, "grad_norm": 3.5315287562431377, "kl": 0.01947021484375, "learning_rate": 7.312020549772697e-07, "loss": -0.0175, "num_tokens": 94105208.0, "reward": 0.0, "reward_std": 1.0435247421264648, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011154588679744094, "rewards/wordcountpos_reward/raw_geo/std": 0.11938244761175347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 801.25, "completions/mean_terminated_length": 801.25, "completions/min_length": 510.0, "completions/min_terminated_length": 510.0, "epoch": 0.4316863372674535, "frac_reward_zero_std": 0.0, "grad_norm": 3.310313780490277, "kl": 0.0143280029296875, "learning_rate": 7.309143828945483e-07, "loss": 0.0187, "num_tokens": 94130364.0, "reward": 0.0, "reward_std": 0.8447370529174805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06995682856623041, "rewards/wordcountpos_reward/raw_geo/std": 0.20105758176274105, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1099.625, "completions/mean_terminated_length": 1099.625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.4318863772754551, "frac_reward_zero_std": 0.0, "grad_norm": 3.268363023950571, "kl": 0.02593994140625, "learning_rate": 7.306266225971519e-07, "loss": -0.0271, "num_tokens": 94181270.0, "reward": 0.0, "reward_std": 0.6817866563796997, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2048717421308062, "rewards/wordcountpos_reward/raw_geo/std": 0.16052271414405195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1209.75, "completions/mean_terminated_length": 1168.2857666015625, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.4320864172834567, "frac_reward_zero_std": 0.0, "grad_norm": 2.9672377288499048, "kl": 0.0160369873046875, "learning_rate": 7.303387742253944e-07, "loss": 0.0124, "num_tokens": 94218242.0, "reward": 0.0, "reward_std": 0.6419894695281982, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.043243210506402015, "rewards/wordcountpos_reward/raw_geo/std": 0.0692859897126928, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1276.5, "completions/mean_terminated_length": 1224.923095703125, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.4322864572914583, "frac_reward_zero_std": 0.0, "grad_norm": 3.0141464445860877, "kl": 0.0152130126953125, "learning_rate": 7.300508379196316e-07, "loss": -0.0206, "num_tokens": 94271738.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5501258373260498, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3437435398410834, "rewards/wordcountpos_reward/raw_geo/std": 0.13881838684929934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619462, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 979.875, "completions/mean_terminated_length": 979.875, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.43248649729945987, "frac_reward_zero_std": 0.0, "grad_norm": 3.4232582538616625, "kl": 0.015350341796875, "learning_rate": 7.297628138202627e-07, "loss": -0.0235, "num_tokens": 94309504.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0532057285308838, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11088840428033599, "rewards/wordcountpos_reward/raw_geo/std": 0.10400867586300046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1365.625, "completions/mean_terminated_length": 1346.4285888671875, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.4326865373074615, "frac_reward_zero_std": 0.0, "grad_norm": 2.6237187949552663, "kl": 0.01348876953125, "learning_rate": 7.294747020677294e-07, "loss": 0.007, "num_tokens": 94345466.0, "reward": 3.3527612686157227e-08, "reward_std": 0.9992033243179321, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021277846227802905, "rewards/wordcountpos_reward/raw_geo/std": 0.051802474488911905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1307.4375, "completions/mean_terminated_length": 1219.9091796875, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.4328865773154631, "frac_reward_zero_std": 0.0, "grad_norm": 3.1512301919868864, "kl": 0.01654052734375, "learning_rate": 7.291865028025164e-07, "loss": 0.0243, "num_tokens": 94391265.0, "reward": 0.0, "reward_std": 0.998887300491333, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.056446959206936126, "rewards/wordcountpos_reward/raw_geo/std": 0.052695120591033005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1235.625, "completions/mean_terminated_length": 1197.857177734375, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.4330866173234647, "frac_reward_zero_std": 0.0, "grad_norm": 3.0616993800778896, "kl": 0.01861572265625, "learning_rate": 7.28898216165151e-07, "loss": 0.0026, "num_tokens": 94433883.0, "reward": 0.0, "reward_std": 0.9353396892547607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12086712827470102, "rewards/wordcountpos_reward/raw_geo/std": 0.06463637179994738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1060.125, "completions/mean_terminated_length": 1060.125, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.4332866573314663, "frac_reward_zero_std": 0.0, "grad_norm": 2.8465155731958474, "kl": 0.0215606689453125, "learning_rate": 7.28609842296203e-07, "loss": 0.006, "num_tokens": 94477269.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8375067114830017, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12310653766091213, "rewards/wordcountpos_reward/raw_geo/std": 0.08630632068393153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1355373393953503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 940.0, "completions/max_terminated_length": 940.0, "completions/mean_length": 849.625, "completions/mean_terminated_length": 849.625, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.4334866973394679, "frac_reward_zero_std": 0.0, "grad_norm": 3.8650468812929444, "kl": 0.021240234375, "learning_rate": 7.283213813362848e-07, "loss": -0.0097, "num_tokens": 94506575.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0158004760742188, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1229962259260747, "rewards/wordcountpos_reward/raw_geo/std": 0.07235204757640029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1302.4375, "completions/mean_terminated_length": 1212.6363525390625, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.4336867373474695, "frac_reward_zero_std": 0.0, "grad_norm": 2.754453661324602, "kl": 0.0128326416015625, "learning_rate": 7.280328334260515e-07, "loss": -0.0614, "num_tokens": 94561038.0, "reward": 0.0, "reward_std": 0.4559486508369446, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0912836716593825, "rewards/wordcountpos_reward/raw_geo/std": 0.09961223142180345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1073.8125, "completions/mean_terminated_length": 1073.8125, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.4338867773554711, "frac_reward_zero_std": 0.0, "grad_norm": 3.7523563266973676, "kl": 0.022064208984375, "learning_rate": 7.277441987062001e-07, "loss": 0.0046, "num_tokens": 94602395.0, "reward": -7.450580596923828e-09, "reward_std": 0.9619778990745544, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.2179713818309638, "rewards/wordcountpos_reward/raw_geo/std": 0.25892209793409177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1115546702045434, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1202.3125, "completions/mean_terminated_length": 1182.4666748046875, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.4340868173634727, "frac_reward_zero_std": 0.0, "grad_norm": 3.078286965837673, "kl": 0.0155181884765625, "learning_rate": 7.274554773174703e-07, "loss": -0.0164, "num_tokens": 94644360.0, "reward": 0.0, "reward_std": 0.5164382457733154, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1683538557741164, "rewards/wordcountpos_reward/raw_geo/std": 0.16414532253020558, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1112.9375, "completions/mean_terminated_length": 1112.9375, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.4342868573714743, "frac_reward_zero_std": 0.0, "grad_norm": 3.100159989933138, "kl": 0.0150909423828125, "learning_rate": 7.271666694006438e-07, "loss": -0.0055, "num_tokens": 94685695.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6830623745918274, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14633050604990472, "rewards/wordcountpos_reward/raw_geo/std": 0.17195105903917196, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1267.25, "completions/mean_terminated_length": 1034.5, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.4344868973794759, "frac_reward_zero_std": 0.0, "grad_norm": 2.9487028810959073, "kl": 0.0161590576171875, "learning_rate": 7.268777750965452e-07, "loss": -0.0013, "num_tokens": 94721083.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7774075269699097, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03380343414391287, "rewards/wordcountpos_reward/raw_geo/std": 0.12169317386613882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1189.5, "completions/mean_terminated_length": 1086.0, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.4346869373874775, "frac_reward_zero_std": 0.0, "grad_norm": 3.5800641226158763, "kl": 0.01702880859375, "learning_rate": 7.265887945460399e-07, "loss": 0.0001, "num_tokens": 94768475.0, "reward": 0.0, "reward_std": 0.8385995030403137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.20344773775458275, "rewards/wordcountpos_reward/raw_geo/std": 0.3213041870244678, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1229.5625, "completions/mean_terminated_length": 1229.5625, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.4348869773954791, "frac_reward_zero_std": 0.0, "grad_norm": 2.927674810182712, "kl": 0.0170440673828125, "learning_rate": 7.262997278900366e-07, "loss": 0.0311, "num_tokens": 94811700.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0126248598098755, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03725512370232067, "rewards/wordcountpos_reward/raw_geo/std": 0.1074653947287971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1366.1875, "completions/mean_terminated_length": 1321.5833740234375, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.4350870174034807, "frac_reward_zero_std": 0.0, "grad_norm": 2.7879932745598275, "kl": 0.016632080078125, "learning_rate": 7.260105752694854e-07, "loss": -0.0086, "num_tokens": 94868983.0, "reward": 0.0, "reward_std": 0.19125214219093323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22312608466341838, "rewards/wordcountpos_reward/raw_geo/std": 0.23224474444384352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1140.625, "completions/mean_terminated_length": 1140.625, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.43528705741148227, "frac_reward_zero_std": 0.0, "grad_norm": 2.3087379314480456, "kl": 0.010711669921875, "learning_rate": 7.257213368253785e-07, "loss": 0.0141, "num_tokens": 94912753.0, "reward": 0.0, "reward_std": 0.8760815262794495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.051270198652258174, "rewards/wordcountpos_reward/raw_geo/std": 0.09456900048567589, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1043.3125, "completions/mean_terminated_length": 1043.3125, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.4354870974194839, "frac_reward_zero_std": 0.0, "grad_norm": 3.982356846168621, "kl": 0.021484375, "learning_rate": 7.254320126987498e-07, "loss": 0.0064, "num_tokens": 94957326.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9053691029548645, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2563116337775772, "rewards/wordcountpos_reward/raw_geo/std": 0.11631092726278018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1231.9375, "completions/mean_terminated_length": 1071.0999755859375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.4356871374274855, "frac_reward_zero_std": 0.0, "grad_norm": 3.047131319764822, "kl": 0.01641845703125, "learning_rate": 7.251426030306751e-07, "loss": -0.009, "num_tokens": 95009669.0, "reward": 0.0, "reward_std": 0.5613985061645508, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01181043062659547, "rewards/wordcountpos_reward/raw_geo/std": 0.09772820037557663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 962.6875, "completions/mean_terminated_length": 962.6875, "completions/min_length": 468.0, "completions/min_terminated_length": 468.0, "epoch": 0.4358871774354871, "frac_reward_zero_std": 0.0, "grad_norm": 3.139471091377754, "kl": 0.0175018310546875, "learning_rate": 7.24853107962272e-07, "loss": -0.0404, "num_tokens": 95047992.0, "reward": 2.9802322387695312e-08, "reward_std": 0.812251091003418, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.051587679261328824, "rewards/wordcountpos_reward/raw_geo/std": 0.048753275053594766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1098.75, "completions/mean_terminated_length": 1098.75, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.4360872174434887, "frac_reward_zero_std": 0.0, "grad_norm": 3.335646471831164, "kl": 0.017852783203125, "learning_rate": 7.245635276346992e-07, "loss": -0.041, "num_tokens": 95090724.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6050213575363159, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015037779400087565, "rewards/wordcountpos_reward/raw_geo/std": 0.08297824963462298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 961.5, "completions/mean_terminated_length": 961.5, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.4362872574514903, "frac_reward_zero_std": 0.0, "grad_norm": 3.602670595455265, "kl": 0.0167083740234375, "learning_rate": 7.242738621891579e-07, "loss": -0.0347, "num_tokens": 95120340.0, "reward": 0.0, "reward_std": 0.9712807536125183, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.008778611168442462, "rewards/wordcountpos_reward/raw_geo/std": 0.13235950141951153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1107.9375, "completions/mean_terminated_length": 1107.9375, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.4364872974594919, "frac_reward_zero_std": 0.0, "grad_norm": 3.7133566839803938, "kl": 0.018035888671875, "learning_rate": 7.239841117668898e-07, "loss": 0.0054, "num_tokens": 95166347.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8946967124938965, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04019702601084514, "rewards/wordcountpos_reward/raw_geo/std": 0.13194104891912953, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 949.25, "completions/mean_terminated_length": 949.25, "completions/min_length": 562.0, "completions/min_terminated_length": 562.0, "epoch": 0.4366873374674935, "frac_reward_zero_std": 0.0, "grad_norm": 3.76002718774887, "kl": 0.01922607421875, "learning_rate": 7.236942765091789e-07, "loss": 0.0515, "num_tokens": 95205079.0, "reward": -5.960464477539063e-08, "reward_std": 0.9554041624069214, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004551920851112163, "rewards/wordcountpos_reward/raw_geo/std": 0.11923137473876848, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1007.1875, "completions/mean_terminated_length": 1007.1875, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.4368873774754951, "frac_reward_zero_std": 0.0, "grad_norm": 3.9177911988265133, "kl": 0.021728515625, "learning_rate": 7.2340435655735e-07, "loss": -0.0075, "num_tokens": 95254002.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6637979745864868, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18524663591804139, "rewards/wordcountpos_reward/raw_geo/std": 0.2074731822889215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1455.9375, "completions/mean_terminated_length": 1382.5, "completions/min_length": 1263.0, "completions/min_terminated_length": 1263.0, "epoch": 0.4370874174834967, "frac_reward_zero_std": 0.0, "grad_norm": 2.755026252551419, "kl": 0.00998687744140625, "learning_rate": 7.231143520527694e-07, "loss": -0.019, "num_tokens": 95305569.0, "reward": 0.0, "reward_std": 0.8876936435699463, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1520628717146717, "rewards/wordcountpos_reward/raw_geo/std": 0.23003025827356988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039004, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1045.9375, "completions/mean_terminated_length": 1045.9375, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.4372874574914983, "frac_reward_zero_std": 0.0, "grad_norm": 3.6100410917592383, "kl": 0.018341064453125, "learning_rate": 7.228242631368448e-07, "loss": -0.0051, "num_tokens": 95333384.0, "reward": 0.0, "reward_std": 0.7713936567306519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024821234798994764, "rewards/wordcountpos_reward/raw_geo/std": 0.03394432283699125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1204.0, "completions/mean_terminated_length": 1204.0, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.4374874974994999, "frac_reward_zero_std": 0.0, "grad_norm": 2.568259204782193, "kl": 0.01239013671875, "learning_rate": 7.225340899510246e-07, "loss": -0.0305, "num_tokens": 95377616.0, "reward": 0.0, "reward_std": 0.9810300469398499, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06480766330654224, "rewards/wordcountpos_reward/raw_geo/std": 0.13195010663745216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 969.625, "completions/mean_terminated_length": 969.625, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.4376875375075015, "frac_reward_zero_std": 0.0, "grad_norm": 1.9836327957225053, "kl": 0.010650634765625, "learning_rate": 7.222438326367986e-07, "loss": 0.0035, "num_tokens": 95425242.0, "reward": -7.450580596923828e-09, "reward_std": 0.9457253813743591, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07921804180670373, "rewards/wordcountpos_reward/raw_geo/std": 0.0778366721011444, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1188.25, "completions/mean_terminated_length": 1143.71435546875, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.4378875775155031, "frac_reward_zero_std": 0.0, "grad_norm": 3.4119811456305618, "kl": 0.0170135498046875, "learning_rate": 7.219534913356978e-07, "loss": -0.034, "num_tokens": 95468086.0, "reward": 0.0, "reward_std": 0.47497260570526123, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05582350316275127, "rewards/wordcountpos_reward/raw_geo/std": 0.0660345248823554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 986.4375, "completions/mean_terminated_length": 986.4375, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.4380876175235047, "frac_reward_zero_std": 0.0, "grad_norm": 3.560764602978998, "kl": 0.015411376953125, "learning_rate": 7.216630661892938e-07, "loss": 0.0204, "num_tokens": 95512509.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7201507091522217, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027808386460004983, "rewards/wordcountpos_reward/raw_geo/std": 0.19402291682626158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 1065.5625, "completions/mean_terminated_length": 1065.5625, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.4382876575315063, "frac_reward_zero_std": 0.0, "grad_norm": 2.793824563812458, "kl": 0.0132598876953125, "learning_rate": 7.213725573391991e-07, "loss": -0.0093, "num_tokens": 95550910.0, "reward": 0.0, "reward_std": 0.5339565277099609, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.078796308227523, "rewards/wordcountpos_reward/raw_geo/std": 0.10540918962586764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1221.5, "completions/mean_terminated_length": 1128.666748046875, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.4384876975395079, "frac_reward_zero_std": 0.0, "grad_norm": 3.4829745875483713, "kl": 0.0188751220703125, "learning_rate": 7.210819649270674e-07, "loss": -0.0295, "num_tokens": 95590526.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5592880249023438, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16072342683301444, "rewards/wordcountpos_reward/raw_geo/std": 0.16550629830288088, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1043.5, "completions/mean_terminated_length": 1043.5, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.4386877375475095, "frac_reward_zero_std": 0.0, "grad_norm": 3.24246176696938, "kl": 0.013916015625, "learning_rate": 7.207912890945926e-07, "loss": -0.0011, "num_tokens": 95642246.0, "reward": 0.0, "reward_std": 0.4785735309123993, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06151522859339303, "rewards/wordcountpos_reward/raw_geo/std": 0.07401856744361068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1181.0, "completions/max_terminated_length": 1181.0, "completions/mean_length": 862.875, "completions/mean_terminated_length": 862.875, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.4388877775555111, "frac_reward_zero_std": 0.0, "grad_norm": 3.589177030917447, "kl": 0.0162353515625, "learning_rate": 7.2050052998351e-07, "loss": -0.0085, "num_tokens": 95671828.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0034339427947998, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14559186612503813, "rewards/wordcountpos_reward/raw_geo/std": 0.07702964031564935, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1120.8125, "completions/mean_terminated_length": 1095.533447265625, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.43908781756351273, "frac_reward_zero_std": 0.0, "grad_norm": 3.2139167049329114, "kl": 0.0123443603515625, "learning_rate": 7.202096877355943e-07, "loss": -0.0457, "num_tokens": 95703633.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0333693027496338, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01531149023255283, "rewards/wordcountpos_reward/raw_geo/std": 0.07266843139061999, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1110.375, "completions/mean_terminated_length": 1110.375, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.4392878575715143, "frac_reward_zero_std": 0.0, "grad_norm": 3.145967855222406, "kl": 0.0160369873046875, "learning_rate": 7.199187624926622e-07, "loss": 0.006, "num_tokens": 95741031.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9825766086578369, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044126997713355556, "rewards/wordcountpos_reward/raw_geo/std": 0.08054107941881317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1161.5, "completions/mean_terminated_length": 1161.5, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.4394878975795159, "frac_reward_zero_std": 0.0, "grad_norm": 2.4657842072187703, "kl": 0.0122528076171875, "learning_rate": 7.1962775439657e-07, "loss": 0.0018, "num_tokens": 95781583.0, "reward": 0.0, "reward_std": 0.7470434904098511, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0023771376809355116, "rewards/wordcountpos_reward/raw_geo/std": 0.07968452301047917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1239.8125, "completions/mean_terminated_length": 1222.4666748046875, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.4396879375875175, "frac_reward_zero_std": 0.0, "grad_norm": 3.2181351096343707, "kl": 0.0185546875, "learning_rate": 7.193366635892142e-07, "loss": -0.0101, "num_tokens": 95825868.0, "reward": 3.725290298461914e-08, "reward_std": 1.0384200811386108, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06744514920504649, "rewards/wordcountpos_reward/raw_geo/std": 0.09400009644797136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1133.5, "completions/mean_terminated_length": 1133.5, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.4398879775955191, "frac_reward_zero_std": 0.0, "grad_norm": 3.579810937545375, "kl": 0.01629638671875, "learning_rate": 7.190454902125326e-07, "loss": -0.0434, "num_tokens": 95865764.0, "reward": 1.862645149230957e-08, "reward_std": 0.9459765553474426, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02205994562775993, "rewards/wordcountpos_reward/raw_geo/std": 0.16449735493959347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1218.9375, "completions/mean_terminated_length": 1178.7857666015625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.44008801760352073, "frac_reward_zero_std": 0.0, "grad_norm": 3.0320709915862825, "kl": 0.0161285400390625, "learning_rate": 7.187542344085022e-07, "loss": 0.0198, "num_tokens": 95904675.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0073434114456177, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09014684843570145, "rewards/wordcountpos_reward/raw_geo/std": 0.2636464853890584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1151.4375, "completions/mean_terminated_length": 1151.4375, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.4402880576115223, "frac_reward_zero_std": 0.0, "grad_norm": 3.314559332106158, "kl": 0.01922607421875, "learning_rate": 7.184628963191408e-07, "loss": 0.0177, "num_tokens": 95952786.0, "reward": 4.470348358154297e-08, "reward_std": 0.9171484708786011, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027060200388384484, "rewards/wordcountpos_reward/raw_geo/std": 0.05471077398964935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1093.3125, "completions/mean_terminated_length": 1035.21435546875, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.4404880976195239, "frac_reward_zero_std": 0.0, "grad_norm": 3.145085765812956, "kl": 0.01910400390625, "learning_rate": 7.181714760865061e-07, "loss": 0.0033, "num_tokens": 96000447.0, "reward": 0.0, "reward_std": 0.9233815670013428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1418198385858184, "rewards/wordcountpos_reward/raw_geo/std": 0.08682736991205883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1222.1875, "completions/mean_terminated_length": 1182.5, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.4406881376275255, "frac_reward_zero_std": 0.0, "grad_norm": 3.408541009155671, "kl": 0.01934814453125, "learning_rate": 7.178799738526962e-07, "loss": -0.0282, "num_tokens": 96050146.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0111584663391113, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0645054632364182, "rewards/wordcountpos_reward/raw_geo/std": 0.07248656912342201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639732, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 947.0, "completions/max_terminated_length": 947.0, "completions/mean_length": 778.9375, "completions/mean_terminated_length": 778.9375, "completions/min_length": 524.0, "completions/min_terminated_length": 524.0, "epoch": 0.4408881776355271, "frac_reward_zero_std": 0.0, "grad_norm": 3.0591312245149886, "kl": 0.014862060546875, "learning_rate": 7.175883897598486e-07, "loss": 0.0577, "num_tokens": 96083249.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0240609645843506, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11378007251047825, "rewards/wordcountpos_reward/raw_geo/std": 0.15684237348943367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1161.9375, "completions/mean_terminated_length": 1113.6429443359375, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.4410882176435287, "frac_reward_zero_std": 0.0, "grad_norm": 3.1688451915807807, "kl": 0.01837158203125, "learning_rate": 7.172967239501413e-07, "loss": -0.0327, "num_tokens": 96120448.0, "reward": 0.0, "reward_std": 0.4241097569465637, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024944764702795427, "rewards/wordcountpos_reward/raw_geo/std": 0.2037324427300553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1220.25, "completions/mean_terminated_length": 1155.6923828125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.4412882576515303, "frac_reward_zero_std": 0.0, "grad_norm": 3.165827726347954, "kl": 0.023162841796875, "learning_rate": 7.170049765657915e-07, "loss": -0.0223, "num_tokens": 96172116.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7883094549179077, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01791336720377388, "rewards/wordcountpos_reward/raw_geo/std": 0.10376911742012511, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1185.1875, "completions/mean_terminated_length": 1164.2000732421875, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.4414882976595319, "frac_reward_zero_std": 0.0, "grad_norm": 2.7888773796338366, "kl": 0.01404571533203125, "learning_rate": 7.167131477490569e-07, "loss": 0.0533, "num_tokens": 96216767.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8836146593093872, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2690075319495745, "rewards/wordcountpos_reward/raw_geo/std": 0.2290496629386573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1217.25, "completions/mean_terminated_length": 1217.25, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.4416883376675335, "frac_reward_zero_std": 0.0, "grad_norm": 3.389382308911604, "kl": 0.018218994140625, "learning_rate": 7.164212376422345e-07, "loss": -0.0227, "num_tokens": 96262515.0, "reward": 0.0, "reward_std": 0.7135185599327087, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12114607565300742, "rewards/wordcountpos_reward/raw_geo/std": 0.1901841143352888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1138.25, "completions/mean_terminated_length": 1114.1334228515625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.4418883776755351, "frac_reward_zero_std": 0.0, "grad_norm": 3.1713677970079206, "kl": 0.0191497802734375, "learning_rate": 7.161292463876608e-07, "loss": 0.0036, "num_tokens": 96306375.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8223682641983032, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18429507074569437, "rewards/wordcountpos_reward/raw_geo/std": 0.08708009918035627, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1251.25, "completions/mean_terminated_length": 1234.666748046875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.4420884176835367, "frac_reward_zero_std": 0.0, "grad_norm": 3.048220720354668, "kl": 0.0168914794921875, "learning_rate": 7.158371741277123e-07, "loss": 0.0186, "num_tokens": 96352355.0, "reward": -2.2351741790771484e-08, "reward_std": 0.935321569442749, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1396978274016175, "rewards/wordcountpos_reward/raw_geo/std": 0.07355563542415652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.1046156988431681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1184.8125, "completions/mean_terminated_length": 1079.75, "completions/min_length": 535.0, "completions/min_terminated_length": 535.0, "epoch": 0.4422884576915383, "frac_reward_zero_std": 0.0, "grad_norm": 2.7309666301453537, "kl": 0.0123138427734375, "learning_rate": 7.155450210048045e-07, "loss": 0.0154, "num_tokens": 96386992.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0465312004089355, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013928325037602912, "rewards/wordcountpos_reward/raw_geo/std": 0.055983938267309186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1061.0, "completions/mean_terminated_length": 1061.0, "completions/min_length": 709.0, "completions/min_terminated_length": 709.0, "epoch": 0.4424884976995399, "frac_reward_zero_std": 0.0, "grad_norm": 3.945813042435558, "kl": 0.022979736328125, "learning_rate": 7.152527871613929e-07, "loss": 0.0011, "num_tokens": 96433112.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9907811880111694, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09876351204357181, "rewards/wordcountpos_reward/raw_geo/std": 0.11838415474647018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 900.5, "completions/mean_terminated_length": 860.5333862304688, "completions/min_length": 513.0, "completions/min_terminated_length": 513.0, "epoch": 0.4426885377075415, "frac_reward_zero_std": 0.0, "grad_norm": 3.773967184055837, "kl": 0.018951416015625, "learning_rate": 7.149604727399717e-07, "loss": -0.0463, "num_tokens": 96473928.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6256674528121948, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018700171918932994, "rewards/wordcountpos_reward/raw_geo/std": 0.12083949031442717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 1075.5, "completions/mean_terminated_length": 1075.5, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.44288857771554313, "frac_reward_zero_std": 0.0, "grad_norm": 3.3182073681543143, "kl": 0.0194091796875, "learning_rate": 7.14668077883075e-07, "loss": -0.0102, "num_tokens": 96521536.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0150034427642822, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.045405509359498095, "rewards/wordcountpos_reward/raw_geo/std": 0.1489999407525768, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1183.6875, "completions/mean_terminated_length": 1162.60009765625, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.4430886177235447, "frac_reward_zero_std": 0.0, "grad_norm": 3.1656902430324365, "kl": 0.01995849609375, "learning_rate": 7.143756027332759e-07, "loss": 0.023, "num_tokens": 96559155.0, "reward": 0.0, "reward_std": 0.6569689512252808, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10808590010315043, "rewards/wordcountpos_reward/raw_geo/std": 0.15919322919828335, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1276.9375, "completions/mean_terminated_length": 1103.4444580078125, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.4432886577315463, "frac_reward_zero_std": 0.0, "grad_norm": 3.2913950054284373, "kl": 0.0158843994140625, "learning_rate": 7.140830474331864e-07, "loss": 0.0006, "num_tokens": 96596938.0, "reward": 0.0, "reward_std": 1.0528181791305542, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.040905737530747985, "rewards/wordcountpos_reward/raw_geo/std": 0.1259060345118351, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 945.4375, "completions/mean_terminated_length": 945.4375, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.4434886977395479, "frac_reward_zero_std": 0.0, "grad_norm": 3.662041990193343, "kl": 0.01678466796875, "learning_rate": 7.137904121254578e-07, "loss": 0.0236, "num_tokens": 96633433.0, "reward": -1.4901161193847656e-08, "reward_std": 0.904813289642334, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1259660150945874, "rewards/wordcountpos_reward/raw_geo/std": 0.031194606539524876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189793, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1234.875, "completions/mean_terminated_length": 1197.0, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.4436887377475495, "frac_reward_zero_std": 0.0, "grad_norm": 3.195794716357442, "kl": 0.02496337890625, "learning_rate": 7.134976969527806e-07, "loss": -0.1085, "num_tokens": 96682911.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9966319799423218, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16650654605828116, "rewards/wordcountpos_reward/raw_geo/std": 0.16508956672284406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1072.3125, "completions/mean_terminated_length": 1072.3125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.44388877775555113, "frac_reward_zero_std": 0.0, "grad_norm": 3.495473241340042, "kl": 0.018463134765625, "learning_rate": 7.132049020578839e-07, "loss": -0.0482, "num_tokens": 96713364.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0372358560562134, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00043618055857061726, "rewards/wordcountpos_reward/raw_geo/std": 0.023043628976973648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1193.875, "completions/mean_terminated_length": 1193.875, "completions/min_length": 1033.0, "completions/min_terminated_length": 1033.0, "epoch": 0.4440888177635527, "frac_reward_zero_std": 0.0, "grad_norm": 2.7545416663362903, "kl": 0.015625, "learning_rate": 7.129120275835358e-07, "loss": 0.0049, "num_tokens": 96760410.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0284537076950073, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02340763698714131, "rewards/wordcountpos_reward/raw_geo/std": 0.05854977103336484, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 987.5625, "completions/mean_terminated_length": 987.5625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.4442888577715543, "frac_reward_zero_std": 0.0, "grad_norm": 3.215480555151194, "kl": 0.0158843994140625, "learning_rate": 7.126190736725434e-07, "loss": -0.0484, "num_tokens": 96801283.0, "reward": -2.9802322387695312e-08, "reward_std": 0.843186616897583, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02821795913104235, "rewards/wordcountpos_reward/raw_geo/std": 0.03754160950862395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1342.5, "completions/mean_terminated_length": 1270.9091796875, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.4444888977795559, "frac_reward_zero_std": 0.0, "grad_norm": 2.1768242703268235, "kl": 0.00853729248046875, "learning_rate": 7.123260404677522e-07, "loss": -0.0255, "num_tokens": 96853195.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9069143533706665, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04643371535305653, "rewards/wordcountpos_reward/raw_geo/std": 0.1304130506597021, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1268.125, "completions/mean_terminated_length": 1268.125, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.4446889377875575, "frac_reward_zero_std": 0.0, "grad_norm": 1.4689929134514876, "kl": 0.006954193115234375, "learning_rate": 7.120329281120464e-07, "loss": -0.0112, "num_tokens": 96899149.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8822857141494751, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.27037198350716096, "rewards/wordcountpos_reward/raw_geo/std": 0.1311982507094262, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1193.25, "completions/mean_terminated_length": 1172.800048828125, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.44488897779555914, "frac_reward_zero_std": 0.0, "grad_norm": 3.3744694657465777, "kl": 0.019622802734375, "learning_rate": 7.117397367483493e-07, "loss": -0.0188, "num_tokens": 96943929.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9198059439659119, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05602008556588594, "rewards/wordcountpos_reward/raw_geo/std": 0.03387028710485849, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1135.5625, "completions/mean_terminated_length": 1135.5625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.4450890178035607, "frac_reward_zero_std": 0.0, "grad_norm": 3.3065189663120877, "kl": 0.018798828125, "learning_rate": 7.114464665196221e-07, "loss": -0.003, "num_tokens": 96980930.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8761552572250366, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05590474427605844, "rewards/wordcountpos_reward/raw_geo/std": 0.12616166795032205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 1046.625, "completions/mean_terminated_length": 1046.625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.4452890578115623, "frac_reward_zero_std": 0.0, "grad_norm": 3.4446704035486264, "kl": 0.0216217041015625, "learning_rate": 7.111531175688646e-07, "loss": -0.0184, "num_tokens": 97018548.0, "reward": 0.0, "reward_std": 0.6106638312339783, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004521519168357614, "rewards/wordcountpos_reward/raw_geo/std": 0.249398861410553, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1182.9375, "completions/mean_terminated_length": 1161.800048828125, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.4454890978195639, "frac_reward_zero_std": 0.0, "grad_norm": 3.509601044458924, "kl": 0.02020263671875, "learning_rate": 7.108596900391152e-07, "loss": 0.0038, "num_tokens": 97062795.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9497358202934265, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10738801266992776, "rewards/wordcountpos_reward/raw_geo/std": 0.08056578508321749, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 915.9375, "completions/mean_terminated_length": 915.9375, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.44568913782756553, "frac_reward_zero_std": 0.0, "grad_norm": 3.4776949951103493, "kl": 0.01995849609375, "learning_rate": 7.105661840734506e-07, "loss": -0.0072, "num_tokens": 97095250.0, "reward": -4.470348358154297e-08, "reward_std": 0.940719723701477, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13691050383869924, "rewards/wordcountpos_reward/raw_geo/std": 0.1716463025309404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1151.5625, "completions/mean_terminated_length": 1151.5625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.44588917783556714, "frac_reward_zero_std": 0.0, "grad_norm": 2.841705290866407, "kl": 0.0138092041015625, "learning_rate": 7.102725998149855e-07, "loss": -0.014, "num_tokens": 97142203.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9745978116989136, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.42044298103092975, "rewards/wordcountpos_reward/raw_geo/std": 0.17189857356998625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 973.25, "completions/mean_terminated_length": 973.25, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.4460892178435687, "frac_reward_zero_std": 0.0, "grad_norm": 3.783707736043377, "kl": 0.020782470703125, "learning_rate": 7.099789374068728e-07, "loss": -0.0519, "num_tokens": 97182935.0, "reward": -7.450580596923828e-09, "reward_std": 1.0506384372711182, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03444739734505659, "rewards/wordcountpos_reward/raw_geo/std": 0.03926164935849679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1134.3125, "completions/mean_terminated_length": 1109.933349609375, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.4462892578515703, "frac_reward_zero_std": 0.0, "grad_norm": 3.311136052019833, "kl": 0.01983642578125, "learning_rate": 7.096851969923039e-07, "loss": -0.0127, "num_tokens": 97218428.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8281007409095764, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03657591315554037, "rewards/wordcountpos_reward/raw_geo/std": 0.08484051589194885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1243.1875, "completions/mean_terminated_length": 1206.5, "completions/min_length": 1023.0, "completions/min_terminated_length": 1023.0, "epoch": 0.4464892978595719, "frac_reward_zero_std": 0.0, "grad_norm": 2.3317542044705566, "kl": 0.0102081298828125, "learning_rate": 7.093913787145079e-07, "loss": -0.0231, "num_tokens": 97265167.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9937655925750732, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01864784677321386, "rewards/wordcountpos_reward/raw_geo/std": 0.0414845265925772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1169.0, "completions/mean_terminated_length": 1121.71435546875, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.44668933786757353, "frac_reward_zero_std": 0.0, "grad_norm": 3.163346290038291, "kl": 0.01751708984375, "learning_rate": 7.090974827167516e-07, "loss": -0.0136, "num_tokens": 97308231.0, "reward": 0.0, "reward_std": 0.4874013066291809, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03198359389678342, "rewards/wordcountpos_reward/raw_geo/std": 0.2379438513609349, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1086.4375, "completions/mean_terminated_length": 1086.4375, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.4468893778755751, "frac_reward_zero_std": 0.0, "grad_norm": 2.5323480012718287, "kl": 0.0096893310546875, "learning_rate": 7.088035091423404e-07, "loss": -0.0008, "num_tokens": 97343334.0, "reward": 0.0, "reward_std": 0.9922982454299927, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0274414747853519, "rewards/wordcountpos_reward/raw_geo/std": 0.11506076228607717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1181.1875, "completions/mean_terminated_length": 1159.933349609375, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.4470894178835767, "frac_reward_zero_std": 0.0, "grad_norm": 3.3618663667617845, "kl": 0.020965576171875, "learning_rate": 7.085094581346172e-07, "loss": -0.0331, "num_tokens": 97394561.0, "reward": 0.0, "reward_std": 1.02137291431427, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04554271795494661, "rewards/wordcountpos_reward/raw_geo/std": 0.07456755782942112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1105.8125, "completions/mean_terminated_length": 1105.8125, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.4472894578915783, "frac_reward_zero_std": 0.0, "grad_norm": 3.1574532411696534, "kl": 0.0144195556640625, "learning_rate": 7.082153298369622e-07, "loss": 0.019, "num_tokens": 97444982.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9575008153915405, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021212986119484224, "rewards/wordcountpos_reward/raw_geo/std": 0.1569531672735837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1129.3125, "completions/mean_terminated_length": 1076.357177734375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.4474894978995799, "frac_reward_zero_std": 0.0, "grad_norm": 3.491977495826679, "kl": 0.0200958251953125, "learning_rate": 7.079211243927943e-07, "loss": 0.0178, "num_tokens": 97487531.0, "reward": 0.0, "reward_std": 0.9241948127746582, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1931395359977978, "rewards/wordcountpos_reward/raw_geo/std": 0.20011190344954377, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1191.1875, "completions/mean_terminated_length": 1191.1875, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.44768953790758154, "frac_reward_zero_std": 0.0, "grad_norm": 3.4303084499196856, "kl": 0.021087646484375, "learning_rate": 7.076268419455686e-07, "loss": -0.0404, "num_tokens": 97533030.0, "reward": 0.0, "reward_std": 0.8846356868743896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06419524381006762, "rewards/wordcountpos_reward/raw_geo/std": 0.050266444086026124, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1029.4375, "completions/mean_terminated_length": 1029.4375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.4478895779155831, "frac_reward_zero_std": 0.0, "grad_norm": 3.6609380985769335, "kl": 0.0165252685546875, "learning_rate": 7.073324826387792e-07, "loss": 0.0096, "num_tokens": 97577413.0, "reward": -7.450580596923828e-09, "reward_std": 1.0064533948898315, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03803669514048851, "rewards/wordcountpos_reward/raw_geo/std": 0.10266454575302006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1238.1875, "completions/mean_terminated_length": 1220.7333984375, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.4480896179235847, "frac_reward_zero_std": 0.0, "grad_norm": 2.8400263501639977, "kl": 0.0162811279296875, "learning_rate": 7.070380466159569e-07, "loss": -0.0157, "num_tokens": 97633008.0, "reward": 0.0, "reward_std": 1.0223641395568848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11457898493242791, "rewards/wordcountpos_reward/raw_geo/std": 0.0685604721956031, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1115.3125, "completions/mean_terminated_length": 1115.3125, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.4482896579315863, "frac_reward_zero_std": 0.0, "grad_norm": 3.2037136112298303, "kl": 0.0201873779296875, "learning_rate": 7.067435340206699e-07, "loss": -0.0637, "num_tokens": 97678373.0, "reward": 0.0, "reward_std": 0.7213500738143921, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1361349749417496, "rewards/wordcountpos_reward/raw_geo/std": 0.31915712202103474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1069.5625, "completions/mean_terminated_length": 1069.5625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.44848969793958793, "frac_reward_zero_std": 0.0, "grad_norm": 2.640484578558775, "kl": 0.011505126953125, "learning_rate": 7.06448944996524e-07, "loss": -0.0137, "num_tokens": 97720734.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6548865437507629, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12204430495349884, "rewards/wordcountpos_reward/raw_geo/std": 0.13929178635158218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1188.75, "completions/mean_terminated_length": 1168.0, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.44868973794758954, "frac_reward_zero_std": 0.0, "grad_norm": 3.054045341181816, "kl": 0.017486572265625, "learning_rate": 7.06154279687162e-07, "loss": -0.0054, "num_tokens": 97755378.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0456587076187134, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.002307817723128442, "rewards/wordcountpos_reward/raw_geo/std": 0.10103581095731752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1181.0625, "completions/mean_terminated_length": 1074.75, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.4488897779555911, "frac_reward_zero_std": 0.0, "grad_norm": 2.493234250045899, "kl": 0.019256591796875, "learning_rate": 7.058595382362641e-07, "loss": 0.0178, "num_tokens": 97797259.0, "reward": 0.0, "reward_std": 0.6542484760284424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17216440515497744, "rewards/wordcountpos_reward/raw_geo/std": 0.12352115051525836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1044.0625, "completions/mean_terminated_length": 1044.0625, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.4490898179635927, "frac_reward_zero_std": 0.0, "grad_norm": 3.7710189395183233, "kl": 0.0228271484375, "learning_rate": 7.055647207875478e-07, "loss": -0.0154, "num_tokens": 97828700.0, "reward": 0.0, "reward_std": 0.5036399960517883, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05812750687892506, "rewards/wordcountpos_reward/raw_geo/std": 0.05643147991383697, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1154.25, "completions/mean_terminated_length": 1104.857177734375, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.4492898579715943, "frac_reward_zero_std": 0.0, "grad_norm": 2.89434952287289, "kl": 0.01239013671875, "learning_rate": 7.052698274847671e-07, "loss": 0.0198, "num_tokens": 97869808.0, "reward": 0.0, "reward_std": 0.2848633825778961, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.141895359106773, "rewards/wordcountpos_reward/raw_geo/std": 0.35904288332475826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1055.0, "completions/mean_terminated_length": 1055.0, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.44948989797959593, "frac_reward_zero_std": 0.0, "grad_norm": 2.517384608499205, "kl": 0.01544189453125, "learning_rate": 7.049748584717135e-07, "loss": -0.0345, "num_tokens": 97909632.0, "reward": -2.9802322387695312e-08, "reward_std": 1.060815691947937, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027777369804323267, "rewards/wordcountpos_reward/raw_geo/std": 0.09918431919698252, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746435, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 972.25, "completions/mean_terminated_length": 972.25, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.44968993798759754, "frac_reward_zero_std": 0.0, "grad_norm": 3.135013411807535, "kl": 0.014312744140625, "learning_rate": 7.046798138922151e-07, "loss": 0.0006, "num_tokens": 97950948.0, "reward": 0.0, "reward_std": 1.0158708095550537, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06492574495324069, "rewards/wordcountpos_reward/raw_geo/std": 0.09040311023393632, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1163.6875, "completions/mean_terminated_length": 1141.2667236328125, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.4498899779955991, "frac_reward_zero_std": 0.0, "grad_norm": 3.4845362892047556, "kl": 0.018096923828125, "learning_rate": 7.043846938901371e-07, "loss": 0.0041, "num_tokens": 97996039.0, "reward": 0.0, "reward_std": 1.0066540241241455, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.021610342745857124, "rewards/wordcountpos_reward/raw_geo/std": 0.21452286186354605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1125.9375, "completions/mean_terminated_length": 1001.25, "completions/min_length": 573.0, "completions/min_terminated_length": 573.0, "epoch": 0.4500900180036007, "frac_reward_zero_std": 0.0, "grad_norm": 3.2011462644008044, "kl": 0.013214111328125, "learning_rate": 7.040894986093814e-07, "loss": 0.0135, "num_tokens": 98039486.0, "reward": 0.0, "reward_std": 0.6272410154342651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01244603918052705, "rewards/wordcountpos_reward/raw_geo/std": 0.09891842066750114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1281.875, "completions/mean_terminated_length": 1267.3333740234375, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.4502900580116023, "frac_reward_zero_std": 0.0, "grad_norm": 2.946441500040091, "kl": 0.01348114013671875, "learning_rate": 7.037942281938864e-07, "loss": -0.0121, "num_tokens": 98091428.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8785233497619629, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15692848298175688, "rewards/wordcountpos_reward/raw_geo/std": 0.1475315221976866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1125.5625, "completions/mean_terminated_length": 1125.5625, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.45049009801960394, "frac_reward_zero_std": 0.0, "grad_norm": 3.3492480696674067, "kl": 0.017425537109375, "learning_rate": 7.034988827876275e-07, "loss": -0.0005, "num_tokens": 98141205.0, "reward": -5.960464477539063e-08, "reward_std": 0.6273061633110046, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19714190179250607, "rewards/wordcountpos_reward/raw_geo/std": 0.1646422885659349, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1168.5625, "completions/mean_terminated_length": 1168.5625, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.45069013802760555, "frac_reward_zero_std": 0.0, "grad_norm": 3.585125232498546, "kl": 0.019989013671875, "learning_rate": 7.032034625346162e-07, "loss": 0.028, "num_tokens": 98195710.0, "reward": 0.0, "reward_std": 0.6260074377059937, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05960194036862136, "rewards/wordcountpos_reward/raw_geo/std": 0.1748873594004431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1262.5625, "completions/mean_terminated_length": 1228.6429443359375, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.4508901780356071, "frac_reward_zero_std": 0.0, "grad_norm": 3.405865927808821, "kl": 0.014892578125, "learning_rate": 7.029079675789009e-07, "loss": -0.0065, "num_tokens": 98252543.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6293748617172241, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07322259908494716, "rewards/wordcountpos_reward/raw_geo/std": 0.08332624133405514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1301.1875, "completions/mean_terminated_length": 1272.7857666015625, "completions/min_length": 1092.0, "completions/min_terminated_length": 1092.0, "epoch": 0.4510902180436087, "frac_reward_zero_std": 0.0, "grad_norm": 3.020512755340875, "kl": 0.017822265625, "learning_rate": 7.026123980645663e-07, "loss": 0.0046, "num_tokens": 98288570.0, "reward": -7.450580596923828e-09, "reward_std": 1.067931890487671, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.044804125163684844, "rewards/wordcountpos_reward/raw_geo/std": 0.05702464048129632, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1167.1875, "completions/mean_terminated_length": 1090.3846435546875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.4512902580516103, "frac_reward_zero_std": 0.0, "grad_norm": 3.3523663564973454, "kl": 0.018218994140625, "learning_rate": 7.023167541357334e-07, "loss": -0.0351, "num_tokens": 98338453.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8631026744842529, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05263152828477448, "rewards/wordcountpos_reward/raw_geo/std": 0.08754282184282342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1270.4375, "completions/mean_terminated_length": 1237.6429443359375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.45149029805961194, "frac_reward_zero_std": 0.0, "grad_norm": 2.842012822082789, "kl": 0.0135498046875, "learning_rate": 7.020210359365594e-07, "loss": 0.0021, "num_tokens": 98387564.0, "reward": -5.960464477539063e-08, "reward_std": 0.31741634011268616, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04701860886320323, "rewards/wordcountpos_reward/raw_geo/std": 0.12282679235342663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14981470036162822, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1195.4375, "completions/mean_terminated_length": 1175.1334228515625, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.45169033806761355, "frac_reward_zero_std": 0.0, "grad_norm": 3.369427156129481, "kl": 0.0170745849609375, "learning_rate": 7.017252436112381e-07, "loss": -0.0279, "num_tokens": 98437491.0, "reward": 0.0, "reward_std": 0.8811448812484741, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15257456616628895, "rewards/wordcountpos_reward/raw_geo/std": 0.3246898951316543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722953, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1150.0, "completions/max_terminated_length": 1150.0, "completions/mean_length": 955.75, "completions/mean_terminated_length": 955.75, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.4518903780756151, "frac_reward_zero_std": 0.0, "grad_norm": 3.501402751913394, "kl": 0.0176849365234375, "learning_rate": 7.014293773039991e-07, "loss": -0.0248, "num_tokens": 98474615.0, "reward": 0.0, "reward_std": 0.4130035936832428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024812066766774325, "rewards/wordcountpos_reward/raw_geo/std": 0.18482131325534865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 977.8125, "completions/mean_terminated_length": 977.8125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.4520904180836167, "frac_reward_zero_std": 0.0, "grad_norm": 2.7394064435417262, "kl": 0.01049041748046875, "learning_rate": 7.01133437159108e-07, "loss": -0.0137, "num_tokens": 98511068.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9946562051773071, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12680113451731956, "rewards/wordcountpos_reward/raw_geo/std": 0.16777503912345923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1354690069789096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1132.0625, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.45229045809161833, "frac_reward_zero_std": 0.0, "grad_norm": 3.0436605819924627, "kl": 0.019775390625, "learning_rate": 7.008374233208667e-07, "loss": -0.0155, "num_tokens": 98551557.0, "reward": 0.0, "reward_std": 0.8640792965888977, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05463905442681298, "rewards/wordcountpos_reward/raw_geo/std": 0.10734213986166895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1154.0, "completions/mean_terminated_length": 1154.0, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.45249049809961994, "frac_reward_zero_std": 0.0, "grad_norm": 3.4193230315364307, "kl": 0.021087646484375, "learning_rate": 7.005413359336128e-07, "loss": -0.0417, "num_tokens": 98593005.0, "reward": -1.4901161193847656e-08, "reward_std": 0.985994815826416, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3419777038715314, "rewards/wordcountpos_reward/raw_geo/std": 0.055782662892215035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1242.9375, "completions/mean_terminated_length": 1183.615478515625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.4526905381076215, "frac_reward_zero_std": 0.0, "grad_norm": 3.3340924097938753, "kl": 0.020751953125, "learning_rate": 7.0024517514172e-07, "loss": 0.0064, "num_tokens": 98642964.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9687765836715698, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07274736241563069, "rewards/wordcountpos_reward/raw_geo/std": 0.04596380859620599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1171.0625, "completions/mean_terminated_length": 1149.1334228515625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.4528905781156231, "frac_reward_zero_std": 0.0, "grad_norm": 3.088571704639172, "kl": 0.0198974609375, "learning_rate": 6.999489410895976e-07, "loss": -0.0297, "num_tokens": 98688205.0, "reward": 5.587935447692871e-09, "reward_std": 1.015572190284729, "rewards/wordcountpos_reward/mean": 5.587935447692871e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13396939385009712, "rewards/wordcountpos_reward/raw_geo/std": 0.10627032768384845, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1196.875, "completions/mean_terminated_length": 1176.666748046875, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.4530906181236247, "frac_reward_zero_std": 0.0, "grad_norm": 3.009327100695344, "kl": 0.0170745849609375, "learning_rate": 6.996526339216904e-07, "loss": -0.0424, "num_tokens": 98721011.0, "reward": 0.0, "reward_std": 0.7664017677307129, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09297314774789246, "rewards/wordcountpos_reward/raw_geo/std": 0.08211024639051949, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1094.3125, "completions/mean_terminated_length": 1094.3125, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.45329065813162633, "frac_reward_zero_std": 0.0, "grad_norm": 3.3961688420061247, "kl": 0.0174713134765625, "learning_rate": 6.993562537824795e-07, "loss": -0.0144, "num_tokens": 98771872.0, "reward": 0.0, "reward_std": 0.8355580568313599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04232777118646325, "rewards/wordcountpos_reward/raw_geo/std": 0.18540059890668142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.17121569675358278, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1210.25, "completions/mean_terminated_length": 1190.933349609375, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.45349069813962795, "frac_reward_zero_std": 0.0, "grad_norm": 2.6375693258751665, "kl": 0.0121612548828125, "learning_rate": 6.990598008164809e-07, "loss": -0.009, "num_tokens": 98818692.0, "reward": 0.0, "reward_std": 1.0490615367889404, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09500393288549824, "rewards/wordcountpos_reward/raw_geo/std": 0.1634297653392594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1051.8125, "completions/mean_terminated_length": 1051.8125, "completions/min_length": 794.0, "completions/min_terminated_length": 794.0, "epoch": 0.4536907381476295, "frac_reward_zero_std": 0.0, "grad_norm": 3.2161417683193934, "kl": 0.0164337158203125, "learning_rate": 6.987632751682467e-07, "loss": 0.052, "num_tokens": 98852905.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5830873847007751, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18842404286433143, "rewards/wordcountpos_reward/raw_geo/std": 0.11903816172794572, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1115.5, "completions/mean_terminated_length": 1026.769287109375, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.4538907781556311, "frac_reward_zero_std": 0.0, "grad_norm": 3.287967052252599, "kl": 0.0171661376953125, "learning_rate": 6.984666769823639e-07, "loss": 0.011, "num_tokens": 98893857.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9781869649887085, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12469396651203224, "rewards/wordcountpos_reward/raw_geo/std": 0.11385446497987942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1098.3125, "completions/mean_terminated_length": 1098.3125, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.4540908181636327, "frac_reward_zero_std": 0.0, "grad_norm": 3.088130854222735, "kl": 0.011993408203125, "learning_rate": 6.981700064034553e-07, "loss": 0.0041, "num_tokens": 98935950.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7935771942138672, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01697756788054005, "rewards/wordcountpos_reward/raw_geo/std": 0.08075879202426382, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1110.4375, "completions/mean_terminated_length": 1084.4666748046875, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.45429085817163434, "frac_reward_zero_std": 0.0, "grad_norm": 3.6514367677992254, "kl": 0.02337646484375, "learning_rate": 6.978732635761788e-07, "loss": 0.0265, "num_tokens": 98990013.0, "reward": 0.0, "reward_std": 1.0012686252593994, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04158620222130358, "rewards/wordcountpos_reward/raw_geo/std": 0.125703986597287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1227.9375, "completions/mean_terminated_length": 1165.1539306640625, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.45449089817963595, "frac_reward_zero_std": 0.0, "grad_norm": 3.4365197066381907, "kl": 0.018463134765625, "learning_rate": 6.975764486452274e-07, "loss": 0.0103, "num_tokens": 99031700.0, "reward": 2.9802322387695312e-08, "reward_std": 1.025686264038086, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17096510916954466, "rewards/wordcountpos_reward/raw_geo/std": 0.14305854593567624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1030.6875, "completions/mean_terminated_length": 1030.6875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.4546909381876375, "frac_reward_zero_std": 0.0, "grad_norm": 3.284446199770536, "kl": 0.01898193359375, "learning_rate": 6.972795617553295e-07, "loss": 0.0362, "num_tokens": 99073583.0, "reward": 0.0, "reward_std": 0.8502248525619507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07474928584759404, "rewards/wordcountpos_reward/raw_geo/std": 0.10594427644600242, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1280.0625, "completions/mean_terminated_length": 1248.6429443359375, "completions/min_length": 1112.0, "completions/min_terminated_length": 1112.0, "epoch": 0.4548909781956391, "frac_reward_zero_std": 0.0, "grad_norm": 2.435841885532149, "kl": 0.01226806640625, "learning_rate": 6.969826030512485e-07, "loss": -0.0175, "num_tokens": 99111272.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7148850560188293, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11166623611917462, "rewards/wordcountpos_reward/raw_geo/std": 0.0628605567382789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1132.625, "completions/mean_terminated_length": 1132.625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.45509101820364073, "frac_reward_zero_std": 0.0, "grad_norm": 3.232297369330918, "kl": 0.0167999267578125, "learning_rate": 6.96685572677783e-07, "loss": -0.0031, "num_tokens": 99156106.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9232697486877441, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05322009757564611, "rewards/wordcountpos_reward/raw_geo/std": 0.05930646889457011, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0749073501808141, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1032.1875, "completions/mean_terminated_length": 1032.1875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.45529105821164234, "frac_reward_zero_std": 0.0, "grad_norm": 3.2568428399554623, "kl": 0.02301025390625, "learning_rate": 6.96388470779766e-07, "loss": -0.0312, "num_tokens": 99188965.0, "reward": 0.0, "reward_std": 0.842793345451355, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2701919235588675, "rewards/wordcountpos_reward/raw_geo/std": 0.17326193009396695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1202.4375, "completions/mean_terminated_length": 1182.60009765625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.45549109821964395, "frac_reward_zero_std": 0.0, "grad_norm": 1.844437060776666, "kl": 0.00931549072265625, "learning_rate": 6.960912975020659e-07, "loss": -0.0289, "num_tokens": 99233724.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7929332852363586, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04034183473783693, "rewards/wordcountpos_reward/raw_geo/std": 0.06766520044887803, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1186.0, "completions/mean_terminated_length": 1186.0, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.4556911382276455, "frac_reward_zero_std": 0.0, "grad_norm": 3.084402591136589, "kl": 0.018157958984375, "learning_rate": 6.957940529895858e-07, "loss": -0.0254, "num_tokens": 99272060.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9293816089630127, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023745208973984716, "rewards/wordcountpos_reward/raw_geo/std": 0.14833148567523802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1308.0, "completions/mean_terminated_length": 1280.571533203125, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.4558911782356471, "frac_reward_zero_std": 0.0, "grad_norm": 2.1838777998791787, "kl": 0.0157623291015625, "learning_rate": 6.954967373872635e-07, "loss": -0.0147, "num_tokens": 99323236.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0489537715911865, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22934432382099573, "rewards/wordcountpos_reward/raw_geo/std": 0.1186633997608905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1262.9375, "completions/mean_terminated_length": 1229.071533203125, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.45609121824364873, "frac_reward_zero_std": 0.0, "grad_norm": 3.225672022980248, "kl": 0.020111083984375, "learning_rate": 6.951993508400713e-07, "loss": -0.0484, "num_tokens": 99374451.0, "reward": 7.450580596923828e-09, "reward_std": 1.0368170738220215, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0036041256624472053, "rewards/wordcountpos_reward/raw_geo/std": 0.058365574516431935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1025.375, "completions/mean_terminated_length": 1025.375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.45629125825165034, "frac_reward_zero_std": 0.0, "grad_norm": 3.0943378844477314, "kl": 0.0199127197265625, "learning_rate": 6.949018934930165e-07, "loss": -0.0122, "num_tokens": 99427289.0, "reward": 0.0, "reward_std": 0.8252255916595459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.056144603213588315, "rewards/wordcountpos_reward/raw_geo/std": 0.17026033427542417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 983.875, "completions/mean_terminated_length": 983.875, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.45649129825965196, "frac_reward_zero_std": 0.0, "grad_norm": 2.651569872254715, "kl": 0.011322021484375, "learning_rate": 6.9460436549114e-07, "loss": -0.0139, "num_tokens": 99469943.0, "reward": 7.450580596923828e-09, "reward_std": 1.0207990407943726, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.22905564701254502, "rewards/wordcountpos_reward/raw_geo/std": 0.10888596883321822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0709720863229836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1212.8125, "completions/mean_terminated_length": 1171.7857666015625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.4566913382676535, "frac_reward_zero_std": 0.0, "grad_norm": 2.5127067764834345, "kl": 0.0134124755859375, "learning_rate": 6.943067669795185e-07, "loss": -0.0429, "num_tokens": 99519684.0, "reward": -2.9802322387695312e-08, "reward_std": 0.886386513710022, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0359135239555652, "rewards/wordcountpos_reward/raw_geo/std": 0.11245954840179766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1247.8125, "completions/mean_terminated_length": 1189.615478515625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.4568913782756551, "frac_reward_zero_std": 0.0, "grad_norm": 3.021410268624662, "kl": 0.0160980224609375, "learning_rate": 6.940090981032621e-07, "loss": 0.0068, "num_tokens": 99556625.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9625469446182251, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0008306396399657924, "rewards/wordcountpos_reward/raw_geo/std": 0.1611796427986057, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1175.25, "completions/mean_terminated_length": 1027.6363525390625, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.45709141828365674, "frac_reward_zero_std": 0.0, "grad_norm": 3.601803185568158, "kl": 0.021697998046875, "learning_rate": 6.937113590075157e-07, "loss": -0.0424, "num_tokens": 99605693.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7223109602928162, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18445599133112647, "rewards/wordcountpos_reward/raw_geo/std": 0.37396052439845867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.133263870794973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1007.4375, "completions/mean_terminated_length": 974.6000366210938, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.45729145829165835, "frac_reward_zero_std": 0.0, "grad_norm": 2.9033351504598848, "kl": 0.0207672119140625, "learning_rate": 6.934135498374579e-07, "loss": 0.0414, "num_tokens": 99637932.0, "reward": 0.0, "reward_std": 0.6977561116218567, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05464291388087743, "rewards/wordcountpos_reward/raw_geo/std": 0.05274846841478036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1254.0, "completions/mean_terminated_length": 1237.60009765625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.4574914982996599, "frac_reward_zero_std": 0.0, "grad_norm": 2.5623648956973737, "kl": 0.0128326416015625, "learning_rate": 6.93115670738302e-07, "loss": 0.0325, "num_tokens": 99691564.0, "reward": 0.0, "reward_std": 0.8369123935699463, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08613487157638237, "rewards/wordcountpos_reward/raw_geo/std": 0.07862898516734931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1086.0, "completions/mean_terminated_length": 1058.4000244140625, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.4576915383076615, "frac_reward_zero_std": 0.0, "grad_norm": 3.293270812642568, "kl": 0.0172576904296875, "learning_rate": 6.928177218552952e-07, "loss": -0.0605, "num_tokens": 99736108.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9904809594154358, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07206164715793408, "rewards/wordcountpos_reward/raw_geo/std": 0.07663500514362555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1196.0625, "completions/mean_terminated_length": 1175.800048828125, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.45789157831566313, "frac_reward_zero_std": 0.0, "grad_norm": 2.7299714901335346, "kl": 0.01458740234375, "learning_rate": 6.925197033337186e-07, "loss": -0.0013, "num_tokens": 99785109.0, "reward": 0.0, "reward_std": 1.0121835470199585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04405333156850968, "rewards/wordcountpos_reward/raw_geo/std": 0.15769481281598838, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1149.5, "completions/mean_terminated_length": 1149.5, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.45809161832366474, "frac_reward_zero_std": 0.0, "grad_norm": 3.0258981442520425, "kl": 0.0173797607421875, "learning_rate": 6.922216153188877e-07, "loss": 0.0183, "num_tokens": 99829349.0, "reward": 0.0, "reward_std": 1.0053064823150635, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03774113179955885, "rewards/wordcountpos_reward/raw_geo/std": 0.1413320541126201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1250.875, "completions/mean_terminated_length": 1234.2667236328125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.45829165833166635, "frac_reward_zero_std": 0.0, "grad_norm": 3.278997068156578, "kl": 0.01904296875, "learning_rate": 6.919234579561511e-07, "loss": 0.0354, "num_tokens": 99880499.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8817808628082275, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07032423723576618, "rewards/wordcountpos_reward/raw_geo/std": 0.1387410166238783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 986.875, "completions/mean_terminated_length": 986.875, "completions/min_length": 479.0, "completions/min_terminated_length": 479.0, "epoch": 0.4584916983396679, "frac_reward_zero_std": 0.0, "grad_norm": 3.1510504628326452, "kl": 0.021087646484375, "learning_rate": 6.916252313908922e-07, "loss": -0.0884, "num_tokens": 99912921.0, "reward": 0.0, "reward_std": 0.9295998811721802, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21744384839343123, "rewards/wordcountpos_reward/raw_geo/std": 0.24240232143978588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12816366850994057, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1174.6875, "completions/mean_terminated_length": 1174.6875, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.4586917383476695, "frac_reward_zero_std": 0.0, "grad_norm": 3.3675843654301945, "kl": 0.0220947265625, "learning_rate": 6.913269357685271e-07, "loss": 0.008, "num_tokens": 99965364.0, "reward": 0.0, "reward_std": 0.9654335379600525, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19580058774495102, "rewards/wordcountpos_reward/raw_geo/std": 0.11192653283536519, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1125.3125, "completions/mean_terminated_length": 1100.3333740234375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.45889177835567113, "frac_reward_zero_std": 0.0, "grad_norm": 3.0999805062404167, "kl": 0.0166473388671875, "learning_rate": 6.910285712345065e-07, "loss": 0.0013, "num_tokens": 100010081.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8592716455459595, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011328093057757175, "rewards/wordcountpos_reward/raw_geo/std": 0.11669154651046298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1274.0, "completions/mean_terminated_length": 1221.84619140625, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.45909181836367274, "frac_reward_zero_std": 0.0, "grad_norm": 3.1453007758910667, "kl": 0.017913818359375, "learning_rate": 6.907301379343143e-07, "loss": -0.0107, "num_tokens": 100062713.0, "reward": 0.0, "reward_std": 0.9131453633308411, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061548486572041254, "rewards/wordcountpos_reward/raw_geo/std": 0.07707726517025236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639732, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1187.875, "completions/mean_terminated_length": 1115.84619140625, "completions/min_length": 637.0, "completions/min_terminated_length": 637.0, "epoch": 0.45929185837167436, "frac_reward_zero_std": 0.0, "grad_norm": 2.6474077253801442, "kl": 0.0149993896484375, "learning_rate": 6.904316360134676e-07, "loss": -0.0003, "num_tokens": 100108607.0, "reward": 5.960464477539063e-08, "reward_std": 0.7110500335693359, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12216034836944852, "rewards/wordcountpos_reward/raw_geo/std": 0.02880471439202467, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1174.125, "completions/mean_terminated_length": 1152.4000244140625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.4594918983796759, "frac_reward_zero_std": 0.0, "grad_norm": 3.2020544606826733, "kl": 0.017242431640625, "learning_rate": 6.901330656175176e-07, "loss": -0.0113, "num_tokens": 100153681.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0032144784927368, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007972814292849371, "rewards/wordcountpos_reward/raw_geo/std": 0.14209134678306837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1203.9375, "completions/mean_terminated_length": 973.6666870117188, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.4596919383876775, "frac_reward_zero_std": 0.0, "grad_norm": 3.4448540667647056, "kl": 0.02325439453125, "learning_rate": 6.898344268920483e-07, "loss": -0.0183, "num_tokens": 100202872.0, "reward": -7.450580596923828e-09, "reward_std": 1.0455693006515503, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.018872495145158024, "rewards/wordcountpos_reward/raw_geo/std": 0.03733085446102431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1074.625, "completions/mean_terminated_length": 1013.857177734375, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.45989197839567914, "frac_reward_zero_std": 0.0, "grad_norm": 2.6257719500410936, "kl": 0.010410308837890625, "learning_rate": 6.895357199826777e-07, "loss": 0.031, "num_tokens": 100244130.0, "reward": 0.0, "reward_std": 1.0632362365722656, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.019165714590085765, "rewards/wordcountpos_reward/raw_geo/std": 0.038268527136876444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1290.625, "completions/mean_terminated_length": 1242.3077392578125, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.46009201840368075, "frac_reward_zero_std": 0.0, "grad_norm": 3.185976130129856, "kl": 0.01788330078125, "learning_rate": 6.892369450350562e-07, "loss": -0.0395, "num_tokens": 100296228.0, "reward": 0.0, "reward_std": 0.9163253307342529, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010908755841303571, "rewards/wordcountpos_reward/raw_geo/std": 0.12894314177245575, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1156.8125, "completions/mean_terminated_length": 1156.8125, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.46029205841168236, "frac_reward_zero_std": 0.0, "grad_norm": 2.93907085082956, "kl": 0.0137176513671875, "learning_rate": 6.889381021948682e-07, "loss": -0.0168, "num_tokens": 100335809.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5709009170532227, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04780792778692805, "rewards/wordcountpos_reward/raw_geo/std": 0.21658103846629206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1197.25, "completions/mean_terminated_length": 1177.0667724609375, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.4604920984196839, "frac_reward_zero_std": 0.0, "grad_norm": 3.6032165148353315, "kl": 0.02392578125, "learning_rate": 6.886391916078307e-07, "loss": 0.0168, "num_tokens": 100381957.0, "reward": 4.470348358154297e-08, "reward_std": 0.9448738098144531, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037560484776970035, "rewards/wordcountpos_reward/raw_geo/std": 0.1023956400459463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1289.375, "completions/mean_terminated_length": 1240.769287109375, "completions/min_length": 1096.0, "completions/min_terminated_length": 1096.0, "epoch": 0.4606921384276855, "frac_reward_zero_std": 0.0, "grad_norm": 2.9340807973439604, "kl": 0.0157318115234375, "learning_rate": 6.883402134196938e-07, "loss": 0.0217, "num_tokens": 100421699.0, "reward": -5.960464477539063e-08, "reward_std": 0.34167003631591797, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014312600560326856, "rewards/wordcountpos_reward/raw_geo/std": 0.20756966925447454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1087.1875, "completions/mean_terminated_length": 1087.1875, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.46089217843568714, "frac_reward_zero_std": 0.0, "grad_norm": 3.3645296835013427, "kl": 0.026458740234375, "learning_rate": 6.880411677762405e-07, "loss": -0.0385, "num_tokens": 100464502.0, "reward": 0.0, "reward_std": 0.9332068562507629, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.020483612721728356, "rewards/wordcountpos_reward/raw_geo/std": 0.10007609277178563, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1388.0625, "completions/mean_terminated_length": 1244.1429443359375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.46109221844368875, "frac_reward_zero_std": 0.0, "grad_norm": 2.887425401475873, "kl": 0.0170745849609375, "learning_rate": 6.877420548232871e-07, "loss": 0.0097, "num_tokens": 100516271.0, "reward": 0.0, "reward_std": 0.6885614991188049, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1184576717015249, "rewards/wordcountpos_reward/raw_geo/std": 0.06650472367257006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1048.9375, "completions/mean_terminated_length": 1048.9375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.46129225845169036, "frac_reward_zero_std": 0.0, "grad_norm": 3.402773661449962, "kl": 0.015350341796875, "learning_rate": 6.874428747066823e-07, "loss": 0.0234, "num_tokens": 100565358.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4694614112377167, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019861456979166216, "rewards/wordcountpos_reward/raw_geo/std": 0.07757692491989218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1131.8125, "completions/mean_terminated_length": 1131.8125, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.4614922984596919, "frac_reward_zero_std": 0.0, "grad_norm": 3.3352448250067264, "kl": 0.01690673828125, "learning_rate": 6.871436275723076e-07, "loss": -0.0229, "num_tokens": 100606811.0, "reward": 1.862645149230957e-08, "reward_std": 1.0255597829818726, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14738706404223953, "rewards/wordcountpos_reward/raw_geo/std": 0.17769642014212444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1186.625, "completions/mean_terminated_length": 1141.857177734375, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.46169233846769353, "frac_reward_zero_std": 0.0, "grad_norm": 3.194085646691492, "kl": 0.022003173828125, "learning_rate": 6.868443135660774e-07, "loss": -0.0041, "num_tokens": 100650421.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0115604400634766, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09825899237572917, "rewards/wordcountpos_reward/raw_geo/std": 0.43672601048818, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1037.625, "completions/mean_terminated_length": 1037.625, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.46189237847569514, "frac_reward_zero_std": 0.0, "grad_norm": 3.0100510434348675, "kl": 0.015869140625, "learning_rate": 6.865449328339384e-07, "loss": 0.0231, "num_tokens": 100696463.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9191683530807495, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005792130292250776, "rewards/wordcountpos_reward/raw_geo/std": 0.08253307458830077, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1243.0625, "completions/mean_terminated_length": 1183.769287109375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.46209241848369675, "frac_reward_zero_std": 0.0, "grad_norm": 3.553650039166501, "kl": 0.020050048828125, "learning_rate": 6.862454855218703e-07, "loss": 0.0374, "num_tokens": 100753712.0, "reward": 0.0, "reward_std": 0.8869396448135376, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14326074164588873, "rewards/wordcountpos_reward/raw_geo/std": 0.20252440489112863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1107.3125, "completions/mean_terminated_length": 1107.3125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.46229245849169837, "frac_reward_zero_std": 0.0, "grad_norm": 2.4247445932032403, "kl": 0.0122528076171875, "learning_rate": 6.859459717758847e-07, "loss": -0.0046, "num_tokens": 100798789.0, "reward": 0.0, "reward_std": 0.9724727272987366, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10096032829258544, "rewards/wordcountpos_reward/raw_geo/std": 0.08093953538792079, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1010.75, "completions/mean_terminated_length": 940.857177734375, "completions/min_length": 640.0, "completions/min_terminated_length": 640.0, "epoch": 0.4624924984996999, "frac_reward_zero_std": 0.0, "grad_norm": 3.3352636424644877, "kl": 0.01788330078125, "learning_rate": 6.856463917420258e-07, "loss": -0.0357, "num_tokens": 100832185.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9914563894271851, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12904168358376428, "rewards/wordcountpos_reward/raw_geo/std": 0.24105470693862135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1252.375, "completions/mean_terminated_length": 1235.86669921875, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.46269253850770153, "frac_reward_zero_std": 0.0, "grad_norm": 3.2087212823590625, "kl": 0.020904541015625, "learning_rate": 6.853467455663705e-07, "loss": -0.041, "num_tokens": 100878799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9508813619613647, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029695300782578028, "rewards/wordcountpos_reward/raw_geo/std": 0.08818980720023441, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1254.5625, "completions/mean_terminated_length": 1238.2000732421875, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.46289257851570315, "frac_reward_zero_std": 0.0, "grad_norm": 3.0827880827172947, "kl": 0.018951416015625, "learning_rate": 6.850470333950274e-07, "loss": -0.0328, "num_tokens": 100929328.0, "reward": 0.0, "reward_std": 0.8211725950241089, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14429788404803792, "rewards/wordcountpos_reward/raw_geo/std": 0.09035340251283032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1269.1875, "completions/mean_terminated_length": 1269.1875, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.46309261852370476, "frac_reward_zero_std": 0.0, "grad_norm": 2.8675336315016016, "kl": 0.0139617919921875, "learning_rate": 6.847472553741376e-07, "loss": -0.0351, "num_tokens": 100973723.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7945818901062012, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0026366875575865346, "rewards/wordcountpos_reward/raw_geo/std": 0.2015969220012403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1134.60009765625, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.4632926585317063, "frac_reward_zero_std": 0.0, "grad_norm": 3.2868672211888494, "kl": 0.0201416015625, "learning_rate": 6.844474116498742e-07, "loss": -0.0246, "num_tokens": 101026930.0, "reward": 0.0, "reward_std": 0.48942065238952637, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17668309557159384, "rewards/wordcountpos_reward/raw_geo/std": 0.15319415669094857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1071.75, "completions/mean_terminated_length": 1071.75, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.4634926985397079, "frac_reward_zero_std": 0.0, "grad_norm": 3.2022766512788476, "kl": 0.0182647705078125, "learning_rate": 6.841475023684425e-07, "loss": -0.0192, "num_tokens": 101068622.0, "reward": 0.0, "reward_std": 1.0000957250595093, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04619967936900398, "rewards/wordcountpos_reward/raw_geo/std": 0.12297870438698962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1113.0, "completions/mean_terminated_length": 1113.0, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.46369273854770954, "frac_reward_zero_std": 0.0, "grad_norm": 2.701150802561792, "kl": 0.012451171875, "learning_rate": 6.838475276760792e-07, "loss": 0.025, "num_tokens": 101112334.0, "reward": -7.450580596923828e-09, "reward_std": 1.0397217273712158, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06734335702972355, "rewards/wordcountpos_reward/raw_geo/std": 0.07653031834189714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1110.5, "completions/mean_terminated_length": 1110.5, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.46389277855571115, "frac_reward_zero_std": 0.0, "grad_norm": 3.6178710849564433, "kl": 0.0201416015625, "learning_rate": 6.835474877190539e-07, "loss": 0.0143, "num_tokens": 101154838.0, "reward": -1.4901161193847656e-08, "reward_std": 1.043917179107666, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06789166062250603, "rewards/wordcountpos_reward/raw_geo/std": 0.17682738538636653, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1181.9375, "completions/mean_terminated_length": 1181.9375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.46409281856371276, "frac_reward_zero_std": 0.0, "grad_norm": 3.3718192333003265, "kl": 0.02008056640625, "learning_rate": 6.83247382643667e-07, "loss": 0.0193, "num_tokens": 101193469.0, "reward": 0.0, "reward_std": 0.6534216403961182, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.029120015189343235, "rewards/wordcountpos_reward/raw_geo/std": 0.04845167095353535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1064.4375, "completions/mean_terminated_length": 1002.21435546875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.4642928585717143, "frac_reward_zero_std": 0.0, "grad_norm": 3.268960369888268, "kl": 0.0208740234375, "learning_rate": 6.829472125962512e-07, "loss": 0.0131, "num_tokens": 101234156.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9710407257080078, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15388267392636876, "rewards/wordcountpos_reward/raw_geo/std": 0.18116019429220717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1126.875, "completions/mean_terminated_length": 1102.0, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.46449289857971593, "frac_reward_zero_std": 0.0, "grad_norm": 3.59118257256825, "kl": 0.021453857421875, "learning_rate": 6.826469777231711e-07, "loss": 0.0114, "num_tokens": 101284890.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7182416915893555, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006234259116003603, "rewards/wordcountpos_reward/raw_geo/std": 0.028087664962092296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1120.0, "completions/mean_terminated_length": 1032.3077392578125, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.46469293858771754, "frac_reward_zero_std": 0.0, "grad_norm": 3.019564681938597, "kl": 0.0178680419921875, "learning_rate": 6.82346678170822e-07, "loss": 0.0029, "num_tokens": 101335962.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8574622869491577, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16172069972662442, "rewards/wordcountpos_reward/raw_geo/std": 0.08923550041855415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1057600358603626, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1057.8125, "completions/mean_terminated_length": 1057.8125, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.46489297859571915, "frac_reward_zero_std": 0.0, "grad_norm": 2.5385844025356534, "kl": 0.01324462890625, "learning_rate": 6.820463140856319e-07, "loss": -0.0189, "num_tokens": 101378407.0, "reward": -7.450580596923828e-09, "reward_std": 1.0621157884597778, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.041408106928693664, "rewards/wordcountpos_reward/raw_geo/std": 0.12137223807174986, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1275.9375, "completions/mean_terminated_length": 1243.9285888671875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.46509301860372076, "frac_reward_zero_std": 0.0, "grad_norm": 3.0254872319601778, "kl": 0.021514892578125, "learning_rate": 6.817458856140594e-07, "loss": 0.0057, "num_tokens": 101430814.0, "reward": 0.0, "reward_std": 0.9314199686050415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2651521688554516, "rewards/wordcountpos_reward/raw_geo/std": 0.2163471724712733, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1137.75, "completions/mean_terminated_length": 1113.60009765625, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.4652930586117223, "frac_reward_zero_std": 0.0, "grad_norm": 3.1278407863181545, "kl": 0.013397216796875, "learning_rate": 6.814453929025949e-07, "loss": 0.0112, "num_tokens": 101470314.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9881935119628906, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14075107277083526, "rewards/wordcountpos_reward/raw_geo/std": 0.21861952888994007, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1274.125, "completions/mean_terminated_length": 1198.8333740234375, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.46549309861972393, "frac_reward_zero_std": 0.0, "grad_norm": 2.4135454706459636, "kl": 0.0126953125, "learning_rate": 6.811448360977596e-07, "loss": 0.0071, "num_tokens": 101512644.0, "reward": 0.0, "reward_std": 0.9764894247055054, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01592865689589068, "rewards/wordcountpos_reward/raw_geo/std": 0.07779490770252223, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1139.125, "completions/mean_terminated_length": 1087.571533203125, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.46569313862772554, "frac_reward_zero_std": 0.0, "grad_norm": 3.014101011569411, "kl": 0.0171966552734375, "learning_rate": 6.808442153461071e-07, "loss": -0.0543, "num_tokens": 101557878.0, "reward": 2.9802322387695312e-08, "reward_std": 0.833800196647644, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0031213001554966403, "rewards/wordcountpos_reward/raw_geo/std": 0.008547357294898479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1171.375, "completions/mean_terminated_length": 1171.375, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.46589317863572716, "frac_reward_zero_std": 0.0, "grad_norm": 2.346053100031601, "kl": 0.0124969482421875, "learning_rate": 6.805435307942209e-07, "loss": -0.0177, "num_tokens": 101598652.0, "reward": 0.0, "reward_std": 0.8635673522949219, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1498745535070764, "rewards/wordcountpos_reward/raw_geo/std": 0.15219332830475374, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1001.125, "completions/mean_terminated_length": 1001.125, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.46609321864372877, "frac_reward_zero_std": 0.0, "grad_norm": 2.7776686885506576, "kl": 0.0141143798828125, "learning_rate": 6.802427825887162e-07, "loss": -0.024, "num_tokens": 101638654.0, "reward": -2.9802322387695312e-08, "reward_std": 1.016717553138733, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007327509476222904, "rewards/wordcountpos_reward/raw_geo/std": 0.0493209928772177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1084.25, "completions/mean_terminated_length": 1084.25, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.4662932586517303, "frac_reward_zero_std": 0.0, "grad_norm": 2.6832521588056064, "kl": 0.0132904052734375, "learning_rate": 6.799419708762393e-07, "loss": -0.0338, "num_tokens": 101668066.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0383212566375732, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018135048233895668, "rewards/wordcountpos_reward/raw_geo/std": 0.043075514408823276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1196.125, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.46649329865973194, "frac_reward_zero_std": 0.0, "grad_norm": 2.896134462615482, "kl": 0.018646240234375, "learning_rate": 6.796410958034674e-07, "loss": -0.0252, "num_tokens": 101711124.0, "reward": 0.0, "reward_std": 0.9482857584953308, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0214821002350095, "rewards/wordcountpos_reward/raw_geo/std": 0.049311566937643686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1074.0, "completions/max_terminated_length": 1074.0, "completions/mean_length": 998.3125, "completions/mean_terminated_length": 998.3125, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.46669333866773355, "frac_reward_zero_std": 0.0, "grad_norm": 1.9377004260529709, "kl": 0.011199951171875, "learning_rate": 6.793401575171085e-07, "loss": -0.0013, "num_tokens": 101749441.0, "reward": 0.0, "reward_std": 0.8245730400085449, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14234482520671807, "rewards/wordcountpos_reward/raw_geo/std": 0.05593587083081882, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1075.0, "completions/max_terminated_length": 1075.0, "completions/mean_length": 882.8125, "completions/mean_terminated_length": 882.8125, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.46689337867573516, "frac_reward_zero_std": 0.0, "grad_norm": 2.7466943145916596, "kl": 0.007785797119140625, "learning_rate": 6.790391561639015e-07, "loss": 0.0118, "num_tokens": 101782574.0, "reward": -5.960464477539063e-08, "reward_std": 1.0078169107437134, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06897733620897702, "rewards/wordcountpos_reward/raw_geo/std": 0.11440335208784733, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1114.1875, "completions/mean_terminated_length": 1114.1875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.46709341868373677, "frac_reward_zero_std": 0.0, "grad_norm": 3.3100432209878536, "kl": 0.01751708984375, "learning_rate": 6.78738091890616e-07, "loss": -0.0334, "num_tokens": 101818617.0, "reward": -2.9802322387695312e-08, "reward_std": 0.844244658946991, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.287181129430126, "rewards/wordcountpos_reward/raw_geo/std": 0.3020786748656309, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1186.8125, "completions/mean_terminated_length": 1186.8125, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.46729345869173833, "frac_reward_zero_std": 0.0, "grad_norm": 2.915373124111391, "kl": 0.01788330078125, "learning_rate": 6.784369648440523e-07, "loss": -0.0056, "num_tokens": 101866686.0, "reward": 0.0, "reward_std": 0.801729142665863, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.036736529877044524, "rewards/wordcountpos_reward/raw_geo/std": 0.06726897083964459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1349.3125, "completions/mean_terminated_length": 1314.5384521484375, "completions/min_length": 1163.0, "completions/min_terminated_length": 1163.0, "epoch": 0.46749349869973994, "frac_reward_zero_std": 0.0, "grad_norm": 2.790738615026571, "kl": 0.019622802734375, "learning_rate": 6.781357751710413e-07, "loss": 0.0031, "num_tokens": 101915843.0, "reward": 0.0, "reward_std": 0.9339981079101562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2851088971725883, "rewards/wordcountpos_reward/raw_geo/std": 0.11815639739311432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1040.6875, "completions/mean_terminated_length": 1010.0667114257812, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.46769353870774155, "frac_reward_zero_std": 0.0, "grad_norm": 3.1308045334531904, "kl": 0.0164947509765625, "learning_rate": 6.778345230184444e-07, "loss": 0.0096, "num_tokens": 101960142.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9505513310432434, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16205292863142273, "rewards/wordcountpos_reward/raw_geo/std": 0.26688240655003304, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1222.875, "completions/mean_terminated_length": 1158.923095703125, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.46789357871574316, "frac_reward_zero_std": 0.0, "grad_norm": 2.714142434285546, "kl": 0.01336669921875, "learning_rate": 6.775332085331537e-07, "loss": 0.0355, "num_tokens": 102011700.0, "reward": -1.862645149230957e-08, "reward_std": 1.0650596618652344, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004316112986449131, "rewards/wordcountpos_reward/raw_geo/std": 0.06821406778848593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1049.5625, "completions/mean_terminated_length": 1049.5625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.4680936187237448, "frac_reward_zero_std": 0.0, "grad_norm": 3.7533191472292895, "kl": 0.0186767578125, "learning_rate": 6.772318318620913e-07, "loss": -0.0231, "num_tokens": 102063269.0, "reward": 0.0, "reward_std": 0.9972120523452759, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09516233285654586, "rewards/wordcountpos_reward/raw_geo/std": 0.10895705678013667, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1246476515504285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1264.8125, "completions/mean_terminated_length": 1081.888916015625, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.46829365873174633, "frac_reward_zero_std": 0.0, "grad_norm": 2.783928406232991, "kl": 0.013946533203125, "learning_rate": 6.769303931522101e-07, "loss": -0.018, "num_tokens": 102114922.0, "reward": -3.725290298461914e-09, "reward_std": 1.0141347646713257, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.18700669970173348, "rewards/wordcountpos_reward/raw_geo/std": 0.15782938701819588, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620105, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 981.125, "completions/mean_terminated_length": 981.125, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.46849369873974794, "frac_reward_zero_std": 0.0, "grad_norm": 2.8870986951844637, "kl": 0.018280029296875, "learning_rate": 6.766288925504928e-07, "loss": -0.0065, "num_tokens": 102145572.0, "reward": 3.725290298461914e-09, "reward_std": 1.0360441207885742, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.052165068695782926, "rewards/wordcountpos_reward/raw_geo/std": 0.060684055812496765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202954, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1172.5, "completions/mean_terminated_length": 1125.71435546875, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.46869373874774956, "frac_reward_zero_std": 0.0, "grad_norm": 2.405980036931825, "kl": 0.0110931396484375, "learning_rate": 6.763273302039524e-07, "loss": -0.011, "num_tokens": 102183652.0, "reward": 3.725290298461914e-09, "reward_std": 1.029883861541748, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.033411224430648256, "rewards/wordcountpos_reward/raw_geo/std": 0.04201919492025664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1023.375, "completions/mean_terminated_length": 1023.375, "completions/min_length": 581.0, "completions/min_terminated_length": 581.0, "epoch": 0.46889377875575117, "frac_reward_zero_std": 0.0, "grad_norm": 2.4458684909221033, "kl": 0.0124969482421875, "learning_rate": 6.760257062596322e-07, "loss": 0.0006, "num_tokens": 102219610.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9049181938171387, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17173411203593997, "rewards/wordcountpos_reward/raw_geo/std": 0.21293357557206546, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567835, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1002.125, "completions/mean_terminated_length": 1002.125, "completions/min_length": 584.0, "completions/min_terminated_length": 584.0, "epoch": 0.4690938187637527, "frac_reward_zero_std": 0.0, "grad_norm": 3.6865448080855474, "kl": 0.0213623046875, "learning_rate": 6.757240208646053e-07, "loss": -0.05, "num_tokens": 102249708.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6179808974266052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08159348976896039, "rewards/wordcountpos_reward/raw_geo/std": 0.04504415174076927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1143.375, "completions/mean_terminated_length": 1119.60009765625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.46929385877175434, "frac_reward_zero_std": 0.0, "grad_norm": 3.1611612313324393, "kl": 0.01947021484375, "learning_rate": 6.75422274165975e-07, "loss": 0.0567, "num_tokens": 102292730.0, "reward": 0.0, "reward_std": 0.9414613842964172, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02673673609719681, "rewards/wordcountpos_reward/raw_geo/std": 0.06788833008066747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1171.3125, "completions/mean_terminated_length": 1171.3125, "completions/min_length": 1083.0, "completions/min_terminated_length": 1083.0, "epoch": 0.46949389877975595, "frac_reward_zero_std": 0.0, "grad_norm": 2.8337690281779335, "kl": 0.0137176513671875, "learning_rate": 6.751204663108741e-07, "loss": 0.0037, "num_tokens": 102331807.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0685477256774902, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05038561008280934, "rewards/wordcountpos_reward/raw_geo/std": 0.0892248903834783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1312.375, "completions/mean_terminated_length": 1299.86669921875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.46969393878775756, "frac_reward_zero_std": 0.0, "grad_norm": 2.956080376692367, "kl": 0.018768310546875, "learning_rate": 6.74818597446466e-07, "loss": -0.0347, "num_tokens": 102385205.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9685295820236206, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021959790974736392, "rewards/wordcountpos_reward/raw_geo/std": 0.3734807613770614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000303, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1233.375, "completions/mean_terminated_length": 1233.375, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.46989397879575917, "frac_reward_zero_std": 0.0, "grad_norm": 2.781118506601174, "kl": 0.0179595947265625, "learning_rate": 6.745166677199426e-07, "loss": -0.0326, "num_tokens": 102427067.0, "reward": 0.0, "reward_std": 0.5157052874565125, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21715063451296235, "rewards/wordcountpos_reward/raw_geo/std": 0.08594438446720502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1251.3125, "completions/mean_terminated_length": 1234.7333984375, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.4700940188037607, "frac_reward_zero_std": 0.0, "grad_norm": 2.3058447196087823, "kl": 0.0147552490234375, "learning_rate": 6.742146772785269e-07, "loss": -0.0355, "num_tokens": 102480016.0, "reward": 2.9802322387695312e-08, "reward_std": 0.624720573425293, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020810350710518545, "rewards/wordcountpos_reward/raw_geo/std": 0.07640557969765856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1062.125, "completions/mean_terminated_length": 1062.125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.47029405881176234, "frac_reward_zero_std": 0.0, "grad_norm": 3.1848848995984578, "kl": 0.01641845703125, "learning_rate": 6.739126262694703e-07, "loss": -0.0312, "num_tokens": 102513234.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9314665198326111, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09440448455431075, "rewards/wordcountpos_reward/raw_geo/std": 0.11202848190596737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1059.6875, "completions/mean_terminated_length": 1059.6875, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.47049409881976395, "frac_reward_zero_std": 0.0, "grad_norm": 2.168199342481158, "kl": 0.01146697998046875, "learning_rate": 6.736105148400547e-07, "loss": -0.0006, "num_tokens": 102560853.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0189063549041748, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16871257357864428, "rewards/wordcountpos_reward/raw_geo/std": 0.10913650234664805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.17716909687891083, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 853.75, "completions/mean_terminated_length": 853.75, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.47069413882776556, "frac_reward_zero_std": 0.0, "grad_norm": 3.2640393150150584, "kl": 0.0203857421875, "learning_rate": 6.733083431375905e-07, "loss": 0.044, "num_tokens": 102588857.0, "reward": 0.0, "reward_std": 0.6373158693313599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.049285487610219725, "rewards/wordcountpos_reward/raw_geo/std": 0.09599329467993438, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1134.0625, "completions/mean_terminated_length": 1049.615478515625, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.4708941788357672, "frac_reward_zero_std": 0.0, "grad_norm": 3.1143574663206963, "kl": 0.0174407958984375, "learning_rate": 6.730061113094184e-07, "loss": 0.0314, "num_tokens": 102643914.0, "reward": 0.0, "reward_std": 0.7355717420578003, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006745454834656767, "rewards/wordcountpos_reward/raw_geo/std": 0.10906786487375146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1182.375, "completions/mean_terminated_length": 1137.0, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.47109421884376873, "frac_reward_zero_std": 0.0, "grad_norm": 2.742339640329466, "kl": 0.016632080078125, "learning_rate": 6.72703819502908e-07, "loss": -0.0222, "num_tokens": 102679448.0, "reward": 0.0, "reward_std": 0.8075650334358215, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0582115164871361, "rewards/wordcountpos_reward/raw_geo/std": 0.11187535633698514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 1063.875, "completions/mean_terminated_length": 1063.875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.47129425885177034, "frac_reward_zero_std": 0.0, "grad_norm": 2.968049929396847, "kl": 0.0142974853515625, "learning_rate": 6.724014678654578e-07, "loss": 0.0054, "num_tokens": 102720846.0, "reward": -5.960464477539063e-08, "reward_std": 0.9385375380516052, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04678408437431443, "rewards/wordcountpos_reward/raw_geo/std": 0.06876756623321238, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1160.125, "completions/mean_terminated_length": 1160.125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.47149429885977195, "frac_reward_zero_std": 0.0, "grad_norm": 2.6525245180548764, "kl": 0.014892578125, "learning_rate": 6.720990565444961e-07, "loss": -0.0307, "num_tokens": 102757424.0, "reward": 0.0, "reward_std": 0.729367733001709, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024485046065706485, "rewards/wordcountpos_reward/raw_geo/std": 0.03641919786423794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1368.0625, "completions/mean_terminated_length": 1236.125, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.47169433886777357, "frac_reward_zero_std": 0.0, "grad_norm": 2.6725913064541613, "kl": 0.01263427734375, "learning_rate": 6.7179658568748e-07, "loss": 0.0136, "num_tokens": 102811097.0, "reward": -7.450580596923828e-09, "reward_std": 0.9978973865509033, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06403763679234466, "rewards/wordcountpos_reward/raw_geo/std": 0.058152162220062864, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14343665526661614, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1294.625, "completions/mean_terminated_length": 1280.933349609375, "completions/min_length": 1109.0, "completions/min_terminated_length": 1109.0, "epoch": 0.4718943788757752, "frac_reward_zero_std": 0.0, "grad_norm": 3.447149392007163, "kl": 0.021148681640625, "learning_rate": 6.714940554418959e-07, "loss": -0.0141, "num_tokens": 102863203.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5628910064697266, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04847681580521633, "rewards/wordcountpos_reward/raw_geo/std": 0.23249517241648637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1172998689652263, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1055.75, "completions/mean_terminated_length": 1055.75, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.47209441888377673, "frac_reward_zero_std": 0.0, "grad_norm": 2.9812046539783763, "kl": 0.0147705078125, "learning_rate": 6.711914659552582e-07, "loss": -0.0034, "num_tokens": 102908439.0, "reward": 0.0, "reward_std": 0.8713029623031616, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015648651639645034, "rewards/wordcountpos_reward/raw_geo/std": 0.09171781084207496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14700718047466632, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1188.3125, "completions/mean_terminated_length": 1167.533447265625, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.47229445889177835, "frac_reward_zero_std": 0.0, "grad_norm": 2.4226726052177754, "kl": 0.0133056640625, "learning_rate": 6.708888173751117e-07, "loss": -0.0122, "num_tokens": 102955756.0, "reward": -5.960464477539063e-08, "reward_std": 0.8376365900039673, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06317670418221667, "rewards/wordcountpos_reward/raw_geo/std": 0.1585458214240526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1423.375, "completions/mean_terminated_length": 1346.75, "completions/min_length": 1270.0, "completions/min_terminated_length": 1270.0, "epoch": 0.47249449889977996, "frac_reward_zero_std": 0.0, "grad_norm": 2.793179390775585, "kl": 0.01763916015625, "learning_rate": 6.705861098490289e-07, "loss": -0.0005, "num_tokens": 103009250.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0607126951217651, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.304967566755184, "rewards/wordcountpos_reward/raw_geo/std": 0.20462994468015558, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115676, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1074.8125, "completions/mean_terminated_length": 1074.8125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.47269453890778157, "frac_reward_zero_std": 0.0, "grad_norm": 2.8345998435138284, "kl": 0.0174560546875, "learning_rate": 6.702833435246112e-07, "loss": 0.0125, "num_tokens": 103054631.0, "reward": 0.0, "reward_std": 0.8408463001251221, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.091572139704682, "rewards/wordcountpos_reward/raw_geo/std": 0.11232257053295337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 1043.125, "completions/mean_terminated_length": 1012.666748046875, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.4728945789157832, "frac_reward_zero_std": 0.0, "grad_norm": 3.400872321822032, "kl": 0.01953125, "learning_rate": 6.699805185494891e-07, "loss": 0.0132, "num_tokens": 103091313.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9636108875274658, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.061253961675745, "rewards/wordcountpos_reward/raw_geo/std": 0.05539342998630899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1289.1875, "completions/mean_terminated_length": 1078.375, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.47309461892378474, "frac_reward_zero_std": 0.0, "grad_norm": 2.1453206519828987, "kl": 0.009033203125, "learning_rate": 6.696776350713213e-07, "loss": 0.0108, "num_tokens": 103135868.0, "reward": 0.0, "reward_std": 0.6380887627601624, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2659528135358337, "rewards/wordcountpos_reward/raw_geo/std": 0.3381826759274157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1081.5625, "completions/mean_terminated_length": 1081.5625, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.47329465893178635, "frac_reward_zero_std": 0.0, "grad_norm": 3.1475946001109096, "kl": 0.015625, "learning_rate": 6.693746932377953e-07, "loss": -0.0315, "num_tokens": 103178549.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0657330751419067, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006307745142625035, "rewards/wordcountpos_reward/raw_geo/std": 0.26059574900326554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1090.1875, "completions/mean_terminated_length": 1090.1875, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.47349469893978796, "frac_reward_zero_std": 0.0, "grad_norm": 3.790858385196293, "kl": 0.017974853515625, "learning_rate": 6.690716931966267e-07, "loss": -0.085, "num_tokens": 103229912.0, "reward": 0.0, "reward_std": 0.8067634701728821, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/std": 0.0, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.15389991938004774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1106.375, "completions/mean_terminated_length": 1106.375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.4736947389477896, "frac_reward_zero_std": 0.0, "grad_norm": 3.6660964854546134, "kl": 0.01885986328125, "learning_rate": 6.687686350955599e-07, "loss": 0.0187, "num_tokens": 103266870.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9664785861968994, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020730264126679734, "rewards/wordcountpos_reward/raw_geo/std": 0.04761352178787761, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 880.0625, "completions/mean_terminated_length": 880.0625, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.4738947789557912, "frac_reward_zero_std": 0.0, "grad_norm": 3.3550177774260455, "kl": 0.020904541015625, "learning_rate": 6.684655190823672e-07, "loss": -0.0004, "num_tokens": 103293303.0, "reward": 7.450580596923828e-09, "reward_std": 1.0330798625946045, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.050705511183213224, "rewards/wordcountpos_reward/raw_geo/std": 0.04664756361075897, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0749073501808141, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1076.75, "completions/mean_terminated_length": 1076.75, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.47409481896379274, "frac_reward_zero_std": 0.0, "grad_norm": 3.33473154922755, "kl": 0.0178070068359375, "learning_rate": 6.681623453048496e-07, "loss": -0.0337, "num_tokens": 103335451.0, "reward": 0.0, "reward_std": 0.8983601927757263, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17327223438572573, "rewards/wordcountpos_reward/raw_geo/std": 0.07368084776230355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1151.3125, "completions/mean_terminated_length": 1151.3125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.47429485897179435, "frac_reward_zero_std": 0.0, "grad_norm": 3.3122870726017912, "kl": 0.0186767578125, "learning_rate": 6.678591139108357e-07, "loss": 0.0061, "num_tokens": 103387160.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7397730350494385, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035280956362353996, "rewards/wordcountpos_reward/raw_geo/std": 0.12518194493379958, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1190.0625, "completions/mean_terminated_length": 1145.7857666015625, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.47449489897979596, "frac_reward_zero_std": 0.0, "grad_norm": 3.3693886126784585, "kl": 0.019744873046875, "learning_rate": 6.675558250481828e-07, "loss": -0.024, "num_tokens": 103433905.0, "reward": 0.0, "reward_std": 0.8514114022254944, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05859921526906615, "rewards/wordcountpos_reward/raw_geo/std": 0.0929313472910356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408157, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 936.6875, "completions/mean_terminated_length": 936.6875, "completions/min_length": 583.0, "completions/min_terminated_length": 583.0, "epoch": 0.4746949389877976, "frac_reward_zero_std": 0.0, "grad_norm": 3.608806505901233, "kl": 0.02838134765625, "learning_rate": 6.672524788647758e-07, "loss": -0.0389, "num_tokens": 103461188.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9656420946121216, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024649375224498806, "rewards/wordcountpos_reward/raw_geo/std": 0.07709630539968917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1385.75, "completions/mean_terminated_length": 1271.5, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.47489497899579913, "frac_reward_zero_std": 0.0, "grad_norm": 2.8443573688367643, "kl": 0.017669677734375, "learning_rate": 6.669490755085275e-07, "loss": 0.0226, "num_tokens": 103516464.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6739264726638794, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004650614537603541, "rewards/wordcountpos_reward/raw_geo/std": 0.10298612300939189, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1215.1875, "completions/mean_terminated_length": 993.6666870117188, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.47509501900380074, "frac_reward_zero_std": 0.0, "grad_norm": 2.601507844179377, "kl": 0.0125885009765625, "learning_rate": 6.666456151273791e-07, "loss": -0.0144, "num_tokens": 103561683.0, "reward": 0.0, "reward_std": 0.6369954347610474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06822691055540794, "rewards/wordcountpos_reward/raw_geo/std": 0.21829337277946845, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1026.0, "completions/max_terminated_length": 1026.0, "completions/mean_length": 829.0625, "completions/mean_terminated_length": 829.0625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.47529505901180236, "frac_reward_zero_std": 0.0, "grad_norm": 3.422220404766956, "kl": 0.0192413330078125, "learning_rate": 6.663420978692991e-07, "loss": 0.0144, "num_tokens": 103594612.0, "reward": 0.0, "reward_std": 0.5488989353179932, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07504189117828258, "rewards/wordcountpos_reward/raw_geo/std": 0.09380474446693218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 985.1875, "completions/mean_terminated_length": 985.1875, "completions/min_length": 565.0, "completions/min_terminated_length": 565.0, "epoch": 0.47549509901980397, "frac_reward_zero_std": 0.0, "grad_norm": 3.8400066592119075, "kl": 0.02569580078125, "learning_rate": 6.660385238822837e-07, "loss": -0.0563, "num_tokens": 103633479.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0582481622695923, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07896159957114934, "rewards/wordcountpos_reward/raw_geo/std": 0.137670536282808, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1187.25, "completions/mean_terminated_length": 1187.25, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.4756951390278056, "frac_reward_zero_std": 0.0, "grad_norm": 2.2394248776656576, "kl": 0.0131378173828125, "learning_rate": 6.657348933143573e-07, "loss": -0.0001, "num_tokens": 103666451.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9347466230392456, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008773987658108807, "rewards/wordcountpos_reward/raw_geo/std": 0.035140555171118956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1241.1875, "completions/mean_terminated_length": 1181.4615478515625, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.47589517903580714, "frac_reward_zero_std": 0.0, "grad_norm": 2.8161637165787443, "kl": 0.0140380859375, "learning_rate": 6.654312063135714e-07, "loss": -0.0257, "num_tokens": 103718878.0, "reward": 0.0, "reward_std": 0.7659417986869812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09611307825457348, "rewards/wordcountpos_reward/raw_geo/std": 0.15653169842520587, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1286.1875, "completions/mean_terminated_length": 1271.933349609375, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.47609521904380875, "frac_reward_zero_std": 0.0, "grad_norm": 3.2769859125600536, "kl": 0.01885986328125, "learning_rate": 6.651274630280049e-07, "loss": -0.0412, "num_tokens": 103762689.0, "reward": 0.0, "reward_std": 1.0512163639068604, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06331198707025434, "rewards/wordcountpos_reward/raw_geo/std": 0.10556881455040562, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1156.625, "completions/mean_terminated_length": 1133.7333984375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.47629525905181036, "frac_reward_zero_std": 0.0, "grad_norm": 3.4864812150397153, "kl": 0.019287109375, "learning_rate": 6.648236636057648e-07, "loss": -0.0402, "num_tokens": 103812011.0, "reward": 0.0, "reward_std": 0.8104844093322754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1297677008385426, "rewards/wordcountpos_reward/raw_geo/std": 0.142749033664274, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1083.125, "completions/mean_terminated_length": 1055.3333740234375, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.47649529905981197, "frac_reward_zero_std": 0.0, "grad_norm": 3.590034391171242, "kl": 0.018310546875, "learning_rate": 6.64519808194985e-07, "loss": -0.0171, "num_tokens": 103863597.0, "reward": 0.0, "reward_std": 0.6687759160995483, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1830262752756874, "rewards/wordcountpos_reward/raw_geo/std": 0.17883512580279834, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1221.25, "completions/mean_terminated_length": 1202.666748046875, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.4766953390678136, "frac_reward_zero_std": 0.0, "grad_norm": 3.102051774503689, "kl": 0.0175628662109375, "learning_rate": 6.642158969438267e-07, "loss": 0.0071, "num_tokens": 103916377.0, "reward": 5.960464477539063e-08, "reward_std": 0.668880820274353, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04257431389100724, "rewards/wordcountpos_reward/raw_geo/std": 0.19919114528536405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1279.3125, "completions/mean_terminated_length": 1205.75, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.47689537907581514, "frac_reward_zero_std": 0.0, "grad_norm": 3.1159118620621498, "kl": 0.01556396484375, "learning_rate": 6.639119300004783e-07, "loss": -0.0177, "num_tokens": 103961542.0, "reward": 0.0, "reward_std": 1.0123437643051147, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19394995035111867, "rewards/wordcountpos_reward/raw_geo/std": 0.07954011274908436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1120.875, "completions/mean_terminated_length": 1120.875, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.47709541908381675, "frac_reward_zero_std": 0.0, "grad_norm": 3.351437593581227, "kl": 0.0183563232421875, "learning_rate": 6.636079075131555e-07, "loss": -0.03, "num_tokens": 104015756.0, "reward": 0.0, "reward_std": 1.0425397157669067, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1684281743348131, "rewards/wordcountpos_reward/raw_geo/std": 0.11972479071462068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408157, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1275.3125, "completions/mean_terminated_length": 1243.21435546875, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.47729545909181836, "frac_reward_zero_std": 0.0, "grad_norm": 2.618215296004809, "kl": 0.0143890380859375, "learning_rate": 6.633038296301013e-07, "loss": 0.0027, "num_tokens": 104054201.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9786999225616455, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.033427585607277176, "rewards/wordcountpos_reward/raw_geo/std": 0.08864316523016143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1010.375, "completions/mean_terminated_length": 1010.375, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.47749549909982, "frac_reward_zero_std": 0.0, "grad_norm": 2.8104381786667063, "kl": 0.0154266357421875, "learning_rate": 6.629996964995851e-07, "loss": -0.0027, "num_tokens": 104094327.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9126765727996826, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24585682601079964, "rewards/wordcountpos_reward/raw_geo/std": 0.22704162953522822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1253.8125, "completions/mean_terminated_length": 1141.9091796875, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.4776955391078216, "frac_reward_zero_std": 0.0, "grad_norm": 2.743611074830518, "kl": 0.01275634765625, "learning_rate": 6.62695508269904e-07, "loss": 0.0021, "num_tokens": 104136820.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9293147325515747, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3749292557883603, "rewards/wordcountpos_reward/raw_geo/std": 0.26941358663723763, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1151.6875, "completions/mean_terminated_length": 1128.4666748046875, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.47789557911582314, "frac_reward_zero_std": 0.0, "grad_norm": 3.4451073863350183, "kl": 0.02117919921875, "learning_rate": 6.623912650893811e-07, "loss": -0.0352, "num_tokens": 104179055.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9473552703857422, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007664648852421313, "rewards/wordcountpos_reward/raw_geo/std": 0.04416876997961496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1192.75, "completions/mean_terminated_length": 1192.75, "completions/min_length": 1044.0, "completions/min_terminated_length": 1044.0, "epoch": 0.47809561912382476, "frac_reward_zero_std": 0.0, "grad_norm": 3.2063257311389517, "kl": 0.0224609375, "learning_rate": 6.620869671063671e-07, "loss": -0.0091, "num_tokens": 104223067.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0504685640335083, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0634642523864687, "rewards/wordcountpos_reward/raw_geo/std": 0.0641957351211786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1085.0, "completions/max_terminated_length": 1085.0, "completions/mean_length": 893.0625, "completions/mean_terminated_length": 893.0625, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.47829565913182637, "frac_reward_zero_std": 0.0, "grad_norm": 3.245788056926649, "kl": 0.01422119140625, "learning_rate": 6.61782614469239e-07, "loss": -0.0364, "num_tokens": 104264052.0, "reward": 7.450580596923828e-09, "reward_std": 1.030979871749878, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10520258841835653, "rewards/wordcountpos_reward/raw_geo/std": 0.06860093031516436, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1107.5, "completions/mean_terminated_length": 1107.5, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.478495699139828, "frac_reward_zero_std": 0.0, "grad_norm": 2.711802946385116, "kl": 0.015838623046875, "learning_rate": 6.614782073264004e-07, "loss": -0.0156, "num_tokens": 104303428.0, "reward": 0.0, "reward_std": 0.9995061159133911, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024808351411425105, "rewards/wordcountpos_reward/raw_geo/std": 0.11221063564859712, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1227.0, "completions/mean_terminated_length": 1188.0, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.4786957391478296, "frac_reward_zero_std": 0.0, "grad_norm": 2.802299374852408, "kl": 0.0144805908203125, "learning_rate": 6.611737458262817e-07, "loss": 0.003, "num_tokens": 104351316.0, "reward": 0.0, "reward_std": 0.7322205305099487, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03747067048520329, "rewards/wordcountpos_reward/raw_geo/std": 0.05314188871895981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1075.4375, "completions/mean_terminated_length": 1075.4375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.47889577915583115, "frac_reward_zero_std": 0.0, "grad_norm": 2.944893043333961, "kl": 0.0124664306640625, "learning_rate": 6.608692301173397e-07, "loss": -0.0427, "num_tokens": 104396035.0, "reward": -5.960464477539063e-08, "reward_std": 1.0186601877212524, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015261303121534373, "rewards/wordcountpos_reward/raw_geo/std": 0.1531904663896025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1159.125, "completions/mean_terminated_length": 1159.125, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.47909581916383276, "frac_reward_zero_std": 0.0, "grad_norm": 2.650016868427002, "kl": 0.014129638671875, "learning_rate": 6.605646603480577e-07, "loss": -0.0014, "num_tokens": 104438021.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9938091039657593, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05971073059862828, "rewards/wordcountpos_reward/raw_geo/std": 0.11952056971971642, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1204.0, "completions/mean_terminated_length": 1105.3333740234375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.47929585917183437, "frac_reward_zero_std": 0.0, "grad_norm": 3.1352180429074727, "kl": 0.023895263671875, "learning_rate": 6.602600366669452e-07, "loss": -0.0006, "num_tokens": 104488621.0, "reward": -2.9802322387695312e-08, "reward_std": 1.013277292251587, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037870170417683355, "rewards/wordcountpos_reward/raw_geo/std": 0.1328349234801076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1118.0, "completions/max_terminated_length": 1118.0, "completions/mean_length": 882.8125, "completions/mean_terminated_length": 882.8125, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.479495899179836, "frac_reward_zero_std": 0.0, "grad_norm": 3.253851043623395, "kl": 0.012119293212890625, "learning_rate": 6.599553592225382e-07, "loss": -0.0334, "num_tokens": 104521426.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0050116777420044, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2107090181552933, "rewards/wordcountpos_reward/raw_geo/std": 0.05957334137319125, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1322.25, "completions/mean_terminated_length": 1241.45458984375, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.4796959391878376, "frac_reward_zero_std": 0.0, "grad_norm": 2.820436390066583, "kl": 0.01556396484375, "learning_rate": 6.596506281633985e-07, "loss": 0.0053, "num_tokens": 104569366.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8926297426223755, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05304211940990448, "rewards/wordcountpos_reward/raw_geo/std": 0.07592987130176979, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1131.9375, "completions/mean_terminated_length": 1131.9375, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.47989597919583915, "frac_reward_zero_std": 0.0, "grad_norm": 3.0435637853769, "kl": 0.015289306640625, "learning_rate": 6.59345843638115e-07, "loss": 0.0031, "num_tokens": 104618605.0, "reward": 0.0, "reward_std": 1.0078725814819336, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13910469811677256, "rewards/wordcountpos_reward/raw_geo/std": 0.15070792960448812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1250.0, "completions/mean_terminated_length": 1214.2857666015625, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.48009601920384076, "frac_reward_zero_std": 0.0, "grad_norm": 2.937279624159831, "kl": 0.016815185546875, "learning_rate": 6.590410057953013e-07, "loss": 0.0008, "num_tokens": 104669101.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0306488275527954, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13269792913867548, "rewards/wordcountpos_reward/raw_geo/std": 0.08894500521554398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1199.4375, "completions/mean_terminated_length": 1179.4000244140625, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.4802960592118424, "frac_reward_zero_std": 0.0, "grad_norm": 2.5692929689655823, "kl": 0.0116119384765625, "learning_rate": 6.587361147835983e-07, "loss": 0.026, "num_tokens": 104712460.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0406734943389893, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05449329397228378, "rewards/wordcountpos_reward/raw_geo/std": 0.15433562810430823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1195.625, "completions/mean_terminated_length": 1057.272705078125, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.480496099219844, "frac_reward_zero_std": 0.0, "grad_norm": 3.0561556983294076, "kl": 0.02081298828125, "learning_rate": 6.584311707516717e-07, "loss": 0.0366, "num_tokens": 104765558.0, "reward": -1.862645149230957e-08, "reward_std": 1.0525107383728027, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013548267449069784, "rewards/wordcountpos_reward/raw_geo/std": 0.05890973276511855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1213.0, "completions/mean_terminated_length": 1213.0, "completions/min_length": 1034.0, "completions/min_terminated_length": 1034.0, "epoch": 0.48069613922784554, "frac_reward_zero_std": 0.0, "grad_norm": 3.071065603731003, "kl": 0.017791748046875, "learning_rate": 6.581261738482139e-07, "loss": -0.0031, "num_tokens": 104811054.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9342724084854126, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054286435529555185, "rewards/wordcountpos_reward/raw_geo/std": 0.07280902202390066, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1233.1875, "completions/mean_terminated_length": 1233.1875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.48089617923584715, "frac_reward_zero_std": 0.0, "grad_norm": 2.926771075281777, "kl": 0.01483154296875, "learning_rate": 6.578211242219429e-07, "loss": 0.0176, "num_tokens": 104859081.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0183165073394775, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16215055264199274, "rewards/wordcountpos_reward/raw_geo/std": 0.20950154459840517, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1153.5625, "completions/mean_terminated_length": 1130.4666748046875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.48109621924384877, "frac_reward_zero_std": 0.0, "grad_norm": 2.84236624000135, "kl": 0.019073486328125, "learning_rate": 6.57516022021602e-07, "loss": 0.005, "num_tokens": 104897114.0, "reward": 0.0, "reward_std": 0.9654723405838013, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0016453783128025585, "rewards/wordcountpos_reward/raw_geo/std": 0.049815233728315075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1086.875, "completions/mean_terminated_length": 1086.875, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.4812962592518504, "frac_reward_zero_std": 0.0, "grad_norm": 3.234044055963462, "kl": 0.0142974853515625, "learning_rate": 6.572108673959608e-07, "loss": 0.0639, "num_tokens": 104933792.0, "reward": 0.0, "reward_std": 0.8987914323806763, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012984110245590161, "rewards/wordcountpos_reward/raw_geo/std": 0.10783372485020258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1379.875, "completions/mean_terminated_length": 1286.4444580078125, "completions/min_length": 1138.0, "completions/min_terminated_length": 1138.0, "epoch": 0.481496299259852, "frac_reward_zero_std": 0.0, "grad_norm": 2.610702226810812, "kl": 0.0161895751953125, "learning_rate": 6.569056604938134e-07, "loss": 0.0057, "num_tokens": 104984726.0, "reward": 0.0, "reward_std": 0.8522013425827026, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.039195952905243574, "rewards/wordcountpos_reward/raw_geo/std": 0.3331455756833289, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1244.0625, "completions/mean_terminated_length": 1207.5, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.48169633926785355, "frac_reward_zero_std": 0.0, "grad_norm": 3.520409047708401, "kl": 0.021331787109375, "learning_rate": 6.566004014639809e-07, "loss": -0.0407, "num_tokens": 105029655.0, "reward": 1.4901161193847656e-08, "reward_std": 0.948483943939209, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08321720087327493, "rewards/wordcountpos_reward/raw_geo/std": 0.15071904967304245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1163.5625, "completions/mean_terminated_length": 1115.5, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.48189637927585516, "frac_reward_zero_std": 0.0, "grad_norm": 3.570852807169432, "kl": 0.0213623046875, "learning_rate": 6.562950904553082e-07, "loss": 0.0198, "num_tokens": 105075856.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0470486879348755, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1221104991156008, "rewards/wordcountpos_reward/raw_geo/std": 0.21048414726359213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1161.0, "completions/max_terminated_length": 1161.0, "completions/mean_length": 979.0, "completions/mean_terminated_length": 979.0, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.48209641928385677, "frac_reward_zero_std": 0.0, "grad_norm": 3.4607149396717225, "kl": 0.0182342529296875, "learning_rate": 6.55989727616667e-07, "loss": -0.0004, "num_tokens": 105103208.0, "reward": 5.960464477539063e-08, "reward_std": 0.6855415105819702, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11295622868804271, "rewards/wordcountpos_reward/raw_geo/std": 0.06460928968905294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1123.9375, "completions/mean_terminated_length": 1070.21435546875, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.4822964592918584, "frac_reward_zero_std": 0.0, "grad_norm": 3.610439033992955, "kl": 0.017791748046875, "learning_rate": 6.556843130969533e-07, "loss": -0.0229, "num_tokens": 105147039.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9519141912460327, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.022152915127902878, "rewards/wordcountpos_reward/raw_geo/std": 0.08539858957252494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 987.3125, "completions/mean_terminated_length": 987.3125, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.48249649929986, "frac_reward_zero_std": 0.0, "grad_norm": 4.102209330933632, "kl": 0.02569580078125, "learning_rate": 6.553788470450883e-07, "loss": -0.0183, "num_tokens": 105196812.0, "reward": 0.0, "reward_std": 0.7178373336791992, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10425410829034103, "rewards/wordcountpos_reward/raw_geo/std": 0.1249504225146541, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1188.3125, "completions/mean_terminated_length": 1188.3125, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.48269653930786155, "frac_reward_zero_std": 0.0, "grad_norm": 3.5050408766881085, "kl": 0.021636962890625, "learning_rate": 6.550733296100195e-07, "loss": -0.0321, "num_tokens": 105240153.0, "reward": 1.4901161193847656e-08, "reward_std": 0.939227819442749, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02795408630983629, "rewards/wordcountpos_reward/raw_geo/std": 0.10828209079226554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 917.0625, "completions/mean_terminated_length": 917.0625, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.48289657931586316, "frac_reward_zero_std": 0.0, "grad_norm": 3.1322482564300893, "kl": 0.0169525146484375, "learning_rate": 6.547677609407178e-07, "loss": 0.0067, "num_tokens": 105268274.0, "reward": 0.0, "reward_std": 0.5556799173355103, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061962209923825104, "rewards/wordcountpos_reward/raw_geo/std": 0.05892696462054709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1274.0625, "completions/mean_terminated_length": 1221.923095703125, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.4830966193238648, "frac_reward_zero_std": 0.0, "grad_norm": 2.5906919080555952, "kl": 0.014129638671875, "learning_rate": 6.5446214118618e-07, "loss": 0.0067, "num_tokens": 105314107.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7129099369049072, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19995896886942768, "rewards/wordcountpos_reward/raw_geo/std": 0.2075856931161145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902597, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1237.0625, "completions/mean_terminated_length": 1117.5455322265625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.4832966593318664, "frac_reward_zero_std": 0.0, "grad_norm": 3.0072428467660215, "kl": 0.01812744140625, "learning_rate": 6.541564704954279e-07, "loss": -0.0134, "num_tokens": 105365748.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7843153476715088, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03668268300567697, "rewards/wordcountpos_reward/raw_geo/std": 0.25776672848815657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1132.25, "completions/mean_terminated_length": 1107.7333984375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.483496699339868, "frac_reward_zero_std": 0.0, "grad_norm": 3.7774953549715327, "kl": 0.022003173828125, "learning_rate": 6.538507490175079e-07, "loss": -0.0219, "num_tokens": 105407624.0, "reward": 7.450580596923828e-09, "reward_std": 1.0253766775131226, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0281388168338768, "rewards/wordcountpos_reward/raw_geo/std": 0.06599753256671449, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1166.0, "completions/max_terminated_length": 1166.0, "completions/mean_length": 932.875, "completions/mean_terminated_length": 932.875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.48369673934786955, "frac_reward_zero_std": 0.0, "grad_norm": 2.6883360933522287, "kl": 0.0141448974609375, "learning_rate": 6.535449769014909e-07, "loss": -0.0039, "num_tokens": 105439686.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9688026905059814, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03407906358194204, "rewards/wordcountpos_reward/raw_geo/std": 0.08528151539662386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 949.125, "completions/mean_terminated_length": 949.125, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.48389677935587116, "frac_reward_zero_std": 0.0, "grad_norm": 3.033955710071834, "kl": 0.021759033203125, "learning_rate": 6.532391542964733e-07, "loss": 0.0345, "num_tokens": 105481936.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0115822553634644, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059313890055068576, "rewards/wordcountpos_reward/raw_geo/std": 0.0747079278897546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1228.3125, "completions/mean_terminated_length": 1017.0, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.4840968193638728, "frac_reward_zero_std": 0.0, "grad_norm": 3.0970212798537182, "kl": 0.020660400390625, "learning_rate": 6.52933281351575e-07, "loss": 0.0203, "num_tokens": 105535181.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5281584858894348, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14405130149505943, "rewards/wordcountpos_reward/raw_geo/std": 0.1741559710884911, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1043.1875, "completions/mean_terminated_length": 1043.1875, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.4842968593718744, "frac_reward_zero_std": 0.0, "grad_norm": 3.5983285713024933, "kl": 0.03094482421875, "learning_rate": 6.526273582159413e-07, "loss": 0.031, "num_tokens": 105571504.0, "reward": 3.725290298461914e-09, "reward_std": 1.0593198537826538, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.18304294163537294, "rewards/wordcountpos_reward/raw_geo/std": 0.10126730232859225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1206.75, "completions/mean_terminated_length": 1164.857177734375, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.484496899379876, "frac_reward_zero_std": 0.0, "grad_norm": 2.9220024494614547, "kl": 0.0150909423828125, "learning_rate": 6.523213850387419e-07, "loss": -0.0074, "num_tokens": 105616300.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9897497892379761, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0016285996369836608, "rewards/wordcountpos_reward/raw_geo/std": 0.09261669194303898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1218.625, "completions/mean_terminated_length": 1218.625, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.48469693938787756, "frac_reward_zero_std": 0.0, "grad_norm": 3.1888988063740222, "kl": 0.0182647705078125, "learning_rate": 6.520153619691704e-07, "loss": 0.0167, "num_tokens": 105662766.0, "reward": 0.0, "reward_std": 0.5687963366508484, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06496439219755465, "rewards/wordcountpos_reward/raw_geo/std": 0.07136138376007878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1119.875, "completions/mean_terminated_length": 1094.533447265625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.48489697939587917, "frac_reward_zero_std": 0.0, "grad_norm": 3.1885433269631527, "kl": 0.0170135498046875, "learning_rate": 6.517092891564452e-07, "loss": 0.0012, "num_tokens": 105706868.0, "reward": -7.450580596923828e-09, "reward_std": 1.06515371799469, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04710292918895006, "rewards/wordcountpos_reward/raw_geo/std": 0.21473504841726873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1158.5625, "completions/mean_terminated_length": 1158.5625, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.4850970194038808, "frac_reward_zero_std": 0.0, "grad_norm": 2.8979908491767503, "kl": 0.013397216796875, "learning_rate": 6.514031667498087e-07, "loss": -0.0056, "num_tokens": 105738965.0, "reward": 0.0, "reward_std": 0.40224289894104004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10965490497181314, "rewards/wordcountpos_reward/raw_geo/std": 0.132447442956899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1314.1875, "completions/mean_terminated_length": 1229.727294921875, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.4852970594118824, "frac_reward_zero_std": 0.0, "grad_norm": 2.2680847855858794, "kl": 0.0145263671875, "learning_rate": 6.510969948985278e-07, "loss": 0.0309, "num_tokens": 105792592.0, "reward": 1.4901161193847656e-08, "reward_std": 0.946617841720581, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09409271246811102, "rewards/wordcountpos_reward/raw_geo/std": 0.044621771544057144, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.14950535726806533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1426.8125, "completions/mean_terminated_length": 1207.25, "completions/min_length": 1120.0, "completions/min_terminated_length": 1120.0, "epoch": 0.485497099419884, "frac_reward_zero_std": 0.0, "grad_norm": 2.731954253231881, "kl": 0.0143585205078125, "learning_rate": 6.507907737518929e-07, "loss": -0.0049, "num_tokens": 105845789.0, "reward": 9.313225746154785e-09, "reward_std": 1.0681819915771484, "rewards/wordcountpos_reward/mean": 9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11236595087080822, "rewards/wordcountpos_reward/raw_geo/std": 0.10771188555372761, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054747, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 797.25, "completions/mean_terminated_length": 797.25, "completions/min_length": 624.0, "completions/min_terminated_length": 624.0, "epoch": 0.48569713942788556, "frac_reward_zero_std": 0.0, "grad_norm": 3.8831902070948416, "kl": 0.0216064453125, "learning_rate": 6.504845034592192e-07, "loss": 0.029, "num_tokens": 105886641.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0196657180786133, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.046225522547264364, "rewards/wordcountpos_reward/raw_geo/std": 0.13695924687956415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1071.1875, "completions/mean_terminated_length": 1071.1875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.48589717943588717, "frac_reward_zero_std": 0.0, "grad_norm": 3.135487855113703, "kl": 0.0165863037109375, "learning_rate": 6.501781841698452e-07, "loss": -0.0687, "num_tokens": 105938012.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0565840005874634, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07050484937769752, "rewards/wordcountpos_reward/raw_geo/std": 0.16495784259140406, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1206.0, "completions/mean_terminated_length": 1138.1539306640625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.4860972194438888, "frac_reward_zero_std": 0.0, "grad_norm": 3.088974739211936, "kl": 0.0148162841796875, "learning_rate": 6.498718160331337e-07, "loss": -0.0115, "num_tokens": 105994012.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0428420305252075, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058403719880075304, "rewards/wordcountpos_reward/raw_geo/std": 0.16873666153637606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1312.6875, "completions/mean_terminated_length": 1227.5455322265625, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.4862972594518904, "frac_reward_zero_std": 0.0, "grad_norm": 2.981761586412861, "kl": 0.014068603515625, "learning_rate": 6.495653991984713e-07, "loss": 0.0369, "num_tokens": 106045439.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7621040344238281, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07369696364234153, "rewards/wordcountpos_reward/raw_geo/std": 0.31351939198954676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1023.875, "completions/mean_terminated_length": 1023.875, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.48649729945989195, "frac_reward_zero_std": 0.0, "grad_norm": 2.7728051388814836, "kl": 0.012664794921875, "learning_rate": 6.492589338152681e-07, "loss": -0.0306, "num_tokens": 106083045.0, "reward": 0.0, "reward_std": 0.7614666223526001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04971697880007704, "rewards/wordcountpos_reward/raw_geo/std": 0.1049702939083125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1268.6875, "completions/mean_terminated_length": 1235.6429443359375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.48669733946789356, "frac_reward_zero_std": 0.0, "grad_norm": 1.9585844009963556, "kl": 0.00949859619140625, "learning_rate": 6.48952420032958e-07, "loss": 0.0436, "num_tokens": 106132048.0, "reward": 0.0, "reward_std": 0.643509030342102, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07585083197050857, "rewards/wordcountpos_reward/raw_geo/std": 0.1923218222402635, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1221.6875, "completions/mean_terminated_length": 1203.1334228515625, "completions/min_length": 1025.0, "completions/min_terminated_length": 1025.0, "epoch": 0.4868973794758952, "frac_reward_zero_std": 0.0, "grad_norm": 2.4924314668237812, "kl": 0.009429931640625, "learning_rate": 6.486458580009986e-07, "loss": 0.0148, "num_tokens": 106178739.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9568771123886108, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003205930569729381, "rewards/wordcountpos_reward/raw_geo/std": 0.08370814915538545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1007.1875, "completions/mean_terminated_length": 1007.1875, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.4870974194838968, "frac_reward_zero_std": 0.0, "grad_norm": 2.6077183922252027, "kl": 0.01104736328125, "learning_rate": 6.483392478688708e-07, "loss": -0.0172, "num_tokens": 106213166.0, "reward": 7.450580596923828e-09, "reward_std": 1.0351207256317139, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16490792461653525, "rewards/wordcountpos_reward/raw_geo/std": 0.06988387986989092, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1151.0, "completions/max_terminated_length": 1151.0, "completions/mean_length": 1058.5, "completions/mean_terminated_length": 1058.5, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.4872974594918984, "frac_reward_zero_std": 0.0, "grad_norm": 3.0191196144857084, "kl": 0.0149993896484375, "learning_rate": 6.480325897860793e-07, "loss": -0.0198, "num_tokens": 106255118.0, "reward": 0.0, "reward_std": 0.9173927903175354, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1302639194866078, "rewards/wordcountpos_reward/raw_geo/std": 0.29384027248853883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1186.6875, "completions/mean_terminated_length": 1186.6875, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.48749749949989996, "frac_reward_zero_std": 0.0, "grad_norm": 2.0115945229037657, "kl": 0.011993408203125, "learning_rate": 6.477258839021519e-07, "loss": -0.0246, "num_tokens": 106310169.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0334620475769043, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02276956805676178, "rewards/wordcountpos_reward/raw_geo/std": 0.13449569928127822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1235.0, "completions/mean_terminated_length": 1173.84619140625, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.48769753950790157, "frac_reward_zero_std": 0.0, "grad_norm": 2.623381280111791, "kl": 0.018646240234375, "learning_rate": 6.474191303666396e-07, "loss": -0.0434, "num_tokens": 106363345.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9885764122009277, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18897554711314038, "rewards/wordcountpos_reward/raw_geo/std": 0.2367928864604072, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1280.125, "completions/mean_terminated_length": 1265.4666748046875, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.4878975795159032, "frac_reward_zero_std": 0.0, "grad_norm": 2.079696260156718, "kl": 0.01059722900390625, "learning_rate": 6.47112329329117e-07, "loss": -0.0212, "num_tokens": 106414315.0, "reward": 2.9802322387695312e-08, "reward_std": 1.049928903579712, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0376691783836376, "rewards/wordcountpos_reward/raw_geo/std": 0.04377180130333968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1123.0667724609375, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.4880976195239048, "frac_reward_zero_std": 0.0, "grad_norm": 3.547359561589532, "kl": 0.016815185546875, "learning_rate": 6.468054809391817e-07, "loss": 0.0104, "num_tokens": 106461389.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9320579767227173, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021896102736248067, "rewards/wordcountpos_reward/raw_geo/std": 0.06906483505006317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1134.0, "completions/mean_terminated_length": 1134.0, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.4882976595319064, "frac_reward_zero_std": 0.0, "grad_norm": 3.102009729455164, "kl": 0.0162200927734375, "learning_rate": 6.464985853464543e-07, "loss": 0.0303, "num_tokens": 106499621.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4008268713951111, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18039388512291893, "rewards/wordcountpos_reward/raw_geo/std": 0.125024480428464, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1163.5625, "completions/mean_terminated_length": 1163.5625, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.48849769953990796, "frac_reward_zero_std": 0.0, "grad_norm": 2.6792763897641976, "kl": 0.012054443359375, "learning_rate": 6.461916427005784e-07, "loss": -0.01, "num_tokens": 106543158.0, "reward": 0.0, "reward_std": 0.9052522778511047, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.031343334440430796, "rewards/wordcountpos_reward/raw_geo/std": 0.04903408547401311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 1097.3125, "completions/mean_terminated_length": 1097.3125, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.48869773954790957, "frac_reward_zero_std": 0.0, "grad_norm": 3.604595737239155, "kl": 0.02203369140625, "learning_rate": 6.458846531512208e-07, "loss": 0.0059, "num_tokens": 106591291.0, "reward": 0.0, "reward_std": 0.9923151731491089, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.048963121024728724, "rewards/wordcountpos_reward/raw_geo/std": 0.10501578637280023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1100.3125, "completions/mean_terminated_length": 1100.3125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.4888977795559112, "frac_reward_zero_std": 0.0, "grad_norm": 3.5406997485643488, "kl": 0.019439697265625, "learning_rate": 6.45577616848071e-07, "loss": -0.0559, "num_tokens": 106641328.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9143186807632446, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01468818361183517, "rewards/wordcountpos_reward/raw_geo/std": 0.08227962389508069, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1208.6875, "completions/mean_terminated_length": 1111.5833740234375, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.4890978195639128, "frac_reward_zero_std": 0.0, "grad_norm": 3.3569702625200226, "kl": 0.021514892578125, "learning_rate": 6.452705339408411e-07, "loss": -0.0179, "num_tokens": 106693179.0, "reward": -2.9802322387695312e-08, "reward_std": 0.810326337814331, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22738422611221867, "rewards/wordcountpos_reward/raw_geo/std": 0.10338868404320666, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722953, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 797.625, "completions/mean_terminated_length": 797.625, "completions/min_length": 590.0, "completions/min_terminated_length": 590.0, "epoch": 0.4892978595719144, "frac_reward_zero_std": 0.0, "grad_norm": 3.3672556471374584, "kl": 0.0126190185546875, "learning_rate": 6.449634045792663e-07, "loss": -0.0405, "num_tokens": 106723317.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0010125637054443, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01850046655838307, "rewards/wordcountpos_reward/raw_geo/std": 0.11904436608320305, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1045.5625, "completions/mean_terminated_length": 980.6428833007812, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.48949789957991596, "frac_reward_zero_std": 0.0, "grad_norm": 2.7417193675770815, "kl": 0.014923095703125, "learning_rate": 6.446562289131042e-07, "loss": -0.0907, "num_tokens": 106767726.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8040706515312195, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2538687757865821, "rewards/wordcountpos_reward/raw_geo/std": 0.17125138964846767, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1245.1875, "completions/mean_terminated_length": 1245.1875, "completions/min_length": 1123.0, "completions/min_terminated_length": 1123.0, "epoch": 0.4896979395879176, "frac_reward_zero_std": 0.0, "grad_norm": 2.982009109193991, "kl": 0.01739501953125, "learning_rate": 6.44349007092135e-07, "loss": -0.0085, "num_tokens": 106804977.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8897942304611206, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03598242655172404, "rewards/wordcountpos_reward/raw_geo/std": 0.12949152264456565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1375.9375, "completions/mean_terminated_length": 1319.5455322265625, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.4898979795959192, "frac_reward_zero_std": 0.0, "grad_norm": 1.6539166768338418, "kl": 0.00716400146484375, "learning_rate": 6.440417392661617e-07, "loss": 0.0151, "num_tokens": 106847024.0, "reward": 1.4901161193847656e-08, "reward_std": 0.7940922975540161, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09132984091475968, "rewards/wordcountpos_reward/raw_geo/std": 0.25863634354807463, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1351.5625, "completions/mean_terminated_length": 1302.0833740234375, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.4900980196039208, "frac_reward_zero_std": 0.0, "grad_norm": 2.8074638423381906, "kl": 0.0158233642578125, "learning_rate": 6.43734425585009e-07, "loss": -0.0261, "num_tokens": 106900777.0, "reward": 0.0, "reward_std": 0.5442103147506714, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07064479245845012, "rewards/wordcountpos_reward/raw_geo/std": 0.13767418526698297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1209.4375, "completions/mean_terminated_length": 1167.9285888671875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.4902980596119224, "frac_reward_zero_std": 0.0, "grad_norm": 3.232114957985279, "kl": 0.0194091796875, "learning_rate": 6.434270661985249e-07, "loss": -0.0241, "num_tokens": 106949128.0, "reward": 0.0, "reward_std": 0.9234766364097595, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19572616700346773, "rewards/wordcountpos_reward/raw_geo/std": 0.09100630221370339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746432, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.49049809961992397, "frac_reward_zero_std": 0.0, "grad_norm": 2.7597452519299988, "kl": 0.012786865234375, "learning_rate": 6.431196612565791e-07, "loss": -0.0215, "num_tokens": 106989203.0, "reward": -2.9802322387695312e-08, "reward_std": 0.955418586730957, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043860793224107374, "rewards/wordcountpos_reward/raw_geo/std": 0.09124156270539935, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1263.875, "completions/mean_terminated_length": 1263.875, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.4906981396279256, "frac_reward_zero_std": 0.0, "grad_norm": 3.100670111903379, "kl": 0.01849365234375, "learning_rate": 6.428122109090635e-07, "loss": 0.0013, "num_tokens": 107035089.0, "reward": 0.0, "reward_std": 0.5583494901657104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06464940019069264, "rewards/wordcountpos_reward/raw_geo/std": 0.20650570532554086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1041.5625, "completions/mean_terminated_length": 1041.5625, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.4908981796359272, "frac_reward_zero_std": 0.0, "grad_norm": 3.1461045252052333, "kl": 0.013519287109375, "learning_rate": 6.425047153058925e-07, "loss": 0.0151, "num_tokens": 107074434.0, "reward": 0.0, "reward_std": 0.9113203287124634, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17876269005490555, "rewards/wordcountpos_reward/raw_geo/std": 0.2819566468648264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1129.1875, "completions/mean_terminated_length": 1129.1875, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.4910982196439288, "frac_reward_zero_std": 0.0, "grad_norm": 3.429929772446792, "kl": 0.015869140625, "learning_rate": 6.421971745970024e-07, "loss": -0.0475, "num_tokens": 107107773.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0493175983428955, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08101173118200361, "rewards/wordcountpos_reward/raw_geo/std": 0.047802346900445436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1334.25, "completions/mean_terminated_length": 1258.9091796875, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.4912982596519304, "frac_reward_zero_std": 0.0, "grad_norm": 2.9778095050331674, "kl": 0.01873779296875, "learning_rate": 6.418895889323512e-07, "loss": -0.0187, "num_tokens": 107163233.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0336551666259766, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07623479107743375, "rewards/wordcountpos_reward/raw_geo/std": 0.10449474677900938, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 902.5625, "completions/mean_terminated_length": 902.5625, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.49149829965993197, "frac_reward_zero_std": 0.0, "grad_norm": 3.7559983700467425, "kl": 0.037139892578125, "learning_rate": 6.415819584619191e-07, "loss": 0.0061, "num_tokens": 107201930.0, "reward": 2.60770320892334e-08, "reward_std": 1.0603476762771606, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02517178659978418, "rewards/wordcountpos_reward/raw_geo/std": 0.21697390491307356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1155.3125, "completions/mean_terminated_length": 1155.3125, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.4916983396679336, "frac_reward_zero_std": 0.0, "grad_norm": 2.9751553752875335, "kl": 0.0146484375, "learning_rate": 6.412742833357083e-07, "loss": -0.0093, "num_tokens": 107250479.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8687210083007812, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22161220433644185, "rewards/wordcountpos_reward/raw_geo/std": 0.31970927918759523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1190.3333740234375, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.4918983796759352, "frac_reward_zero_std": 0.0, "grad_norm": 3.358877660785233, "kl": 0.017333984375, "learning_rate": 6.409665637037424e-07, "loss": -0.013, "num_tokens": 107302874.0, "reward": 0.0, "reward_std": 0.6936535239219666, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.043532523949597154, "rewards/wordcountpos_reward/raw_geo/std": 0.11037295566634076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1146.1875, "completions/mean_terminated_length": 1122.60009765625, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.4920984196839368, "frac_reward_zero_std": 0.0, "grad_norm": 2.5584248328014425, "kl": 0.01074981689453125, "learning_rate": 6.406587997160669e-07, "loss": -0.0107, "num_tokens": 107351933.0, "reward": 1.4901161193847656e-08, "reward_std": 0.956708550453186, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06590199869165475, "rewards/wordcountpos_reward/raw_geo/std": 0.17280471852612195, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1071.8125, "completions/mean_terminated_length": 1071.8125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.49229845969193836, "frac_reward_zero_std": 0.0, "grad_norm": 3.380259030179718, "kl": 0.0194091796875, "learning_rate": 6.403509915227491e-07, "loss": -0.0088, "num_tokens": 107391306.0, "reward": 7.450580596923828e-09, "reward_std": 1.050563097000122, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.004380850412775085, "rewards/wordcountpos_reward/raw_geo/std": 0.10373152187280874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12464765155042849, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1236.5625, "completions/mean_terminated_length": 1236.5625, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.49249849969994, "frac_reward_zero_std": 0.0, "grad_norm": 3.328738687739562, "kl": 0.01898193359375, "learning_rate": 6.400431392738775e-07, "loss": -0.0284, "num_tokens": 107440267.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7745208740234375, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.049779026964028715, "rewards/wordcountpos_reward/raw_geo/std": 0.09052608084600176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1219.125, "completions/mean_terminated_length": 1091.45458984375, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.4926985397079416, "frac_reward_zero_std": 0.0, "grad_norm": 2.946468007156027, "kl": 0.0173492431640625, "learning_rate": 6.397352431195624e-07, "loss": 0.0129, "num_tokens": 107487445.0, "reward": 0.0, "reward_std": 1.0040605068206787, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10950365021867589, "rewards/wordcountpos_reward/raw_geo/std": 0.11634695834100878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1125.4375, "completions/mean_terminated_length": 1125.4375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.4928985797159432, "frac_reward_zero_std": 0.0, "grad_norm": 3.6179115618409576, "kl": 0.0177459716796875, "learning_rate": 6.394273032099352e-07, "loss": 0.0057, "num_tokens": 107528220.0, "reward": 0.0, "reward_std": 0.9868813753128052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0653137259991155, "rewards/wordcountpos_reward/raw_geo/std": 0.09864775376451819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1158.0, "completions/mean_terminated_length": 1158.0, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.4930986197239448, "frac_reward_zero_std": 0.0, "grad_norm": 3.3025263848057707, "kl": 0.0163116455078125, "learning_rate": 6.391193196951489e-07, "loss": 0.0137, "num_tokens": 107566132.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8452931642532349, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0986428606101246, "rewards/wordcountpos_reward/raw_geo/std": 0.027661254114788793, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1256.75, "completions/mean_terminated_length": 1200.615478515625, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.49329865973194637, "frac_reward_zero_std": 0.0, "grad_norm": 2.536485359610273, "kl": 0.0121307373046875, "learning_rate": 6.388112927253777e-07, "loss": 0.0098, "num_tokens": 107606136.0, "reward": -3.725290298461914e-09, "reward_std": 0.9943126440048218, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010622799288382162, "rewards/wordcountpos_reward/raw_geo/std": 0.10060908835549404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 1053.8125, "completions/mean_terminated_length": 1053.8125, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.493498699739948, "frac_reward_zero_std": 0.0, "grad_norm": 3.431586775649576, "kl": 0.01702880859375, "learning_rate": 6.385032224508167e-07, "loss": 0.0037, "num_tokens": 107640077.0, "reward": -4.470348358154297e-08, "reward_std": 1.061139464378357, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025684245429971893, "rewards/wordcountpos_reward/raw_geo/std": 0.08271487138021745, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1301.75, "completions/mean_terminated_length": 1301.75, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.4936987397479496, "frac_reward_zero_std": 0.0, "grad_norm": 3.3422675946522493, "kl": 0.0194091796875, "learning_rate": 6.381951090216828e-07, "loss": -0.004, "num_tokens": 107687849.0, "reward": 5.960464477539063e-08, "reward_std": 0.5935417413711548, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013028710457026802, "rewards/wordcountpos_reward/raw_geo/std": 0.10484475703817267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1168.5625, "completions/mean_terminated_length": 1092.076904296875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.4938987797559512, "frac_reward_zero_std": 0.0, "grad_norm": 3.019053722617368, "kl": 0.016448974609375, "learning_rate": 6.37886952588213e-07, "loss": -0.0097, "num_tokens": 107728306.0, "reward": 0.0, "reward_std": 0.953713059425354, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10889538560814847, "rewards/wordcountpos_reward/raw_geo/std": 0.21916851074227803, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1114.875, "completions/mean_terminated_length": 1114.875, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.4940988197639528, "frac_reward_zero_std": 0.0, "grad_norm": 3.0393703401148544, "kl": 0.015289306640625, "learning_rate": 6.375787533006663e-07, "loss": 0.0354, "num_tokens": 107775400.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4097752571105957, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06894876221845399, "rewards/wordcountpos_reward/raw_geo/std": 0.10158874078696135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13817594795257457, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1226.25, "completions/mean_terminated_length": 1226.25, "completions/min_length": 1058.0, "completions/min_terminated_length": 1058.0, "epoch": 0.49429885977195437, "frac_reward_zero_std": 0.0, "grad_norm": 3.023266159287197, "kl": 0.01297760009765625, "learning_rate": 6.372705113093215e-07, "loss": -0.0153, "num_tokens": 107821820.0, "reward": 0.0, "reward_std": 0.5844358205795288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10895630159444845, "rewards/wordcountpos_reward/raw_geo/std": 0.09798160771548806, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1465024333004847, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1260.375, "completions/mean_terminated_length": 1260.375, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.494498899779956, "frac_reward_zero_std": 0.0, "grad_norm": 3.1764984872287267, "kl": 0.0177001953125, "learning_rate": 6.369622267644792e-07, "loss": -0.0428, "num_tokens": 107872066.0, "reward": 1.4901161193847656e-08, "reward_std": 0.925586462020874, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24454880733031578, "rewards/wordcountpos_reward/raw_geo/std": 0.3834868807913282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1092.875, "completions/mean_terminated_length": 1065.7333984375, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.4946989397879576, "frac_reward_zero_std": 0.0, "grad_norm": 3.4464024998058416, "kl": 0.02276611328125, "learning_rate": 6.366538998164604e-07, "loss": -0.0924, "num_tokens": 107922328.0, "reward": 0.0, "reward_std": 0.7486235499382019, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04677570681555917, "rewards/wordcountpos_reward/raw_geo/std": 0.08019824996105765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1174.75, "completions/mean_terminated_length": 1066.3333740234375, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.4948989797959592, "frac_reward_zero_std": 0.0, "grad_norm": 2.759598401508637, "kl": 0.011932373046875, "learning_rate": 6.363455306156063e-07, "loss": 0.0596, "num_tokens": 107975268.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9172544479370117, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11111658298382501, "rewards/wordcountpos_reward/raw_geo/std": 0.13038411399091193, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1143.625, "completions/mean_terminated_length": 1143.625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.4950990198039608, "frac_reward_zero_std": 0.0, "grad_norm": 3.023599068110238, "kl": 0.0138092041015625, "learning_rate": 6.360371193122798e-07, "loss": -0.0172, "num_tokens": 108013190.0, "reward": 7.450580596923828e-09, "reward_std": 1.0594513416290283, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1735414933987042, "rewards/wordcountpos_reward/raw_geo/std": 0.19715707734181126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1196.75, "completions/mean_terminated_length": 1196.75, "completions/min_length": 1040.0, "completions/min_terminated_length": 1040.0, "epoch": 0.49529905981196237, "frac_reward_zero_std": 0.0, "grad_norm": 3.037799318922153, "kl": 0.023895263671875, "learning_rate": 6.357286660568628e-07, "loss": -0.007, "num_tokens": 108050098.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7352159023284912, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.011233889771541297, "rewards/wordcountpos_reward/raw_geo/std": 0.07050412485151247, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1284.5625, "completions/mean_terminated_length": 1253.7857666015625, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.495499099819964, "frac_reward_zero_std": 0.0, "grad_norm": 3.2481690998485484, "kl": 0.0206298828125, "learning_rate": 6.354201709997592e-07, "loss": -0.0555, "num_tokens": 108103571.0, "reward": -5.960464477539063e-08, "reward_std": 0.5067373514175415, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06679104025656017, "rewards/wordcountpos_reward/raw_geo/std": 0.07106346237085676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1367.375, "completions/mean_terminated_length": 1264.2222900390625, "completions/min_length": 1140.0, "completions/min_terminated_length": 1140.0, "epoch": 0.4956991398279656, "frac_reward_zero_std": 0.0, "grad_norm": 2.5976917124259526, "kl": 0.0142059326171875, "learning_rate": 6.351116342913923e-07, "loss": -0.031, "num_tokens": 108153609.0, "reward": -2.9802322387695312e-08, "reward_std": 0.37114232778549194, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06426513574036685, "rewards/wordcountpos_reward/raw_geo/std": 0.3410302217984672, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1233.125, "completions/mean_terminated_length": 1215.3333740234375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.4958991798359672, "frac_reward_zero_std": 0.0, "grad_norm": 2.9999157712304223, "kl": 0.0202789306640625, "learning_rate": 6.348030560822059e-07, "loss": 0.0071, "num_tokens": 108202851.0, "reward": 0.0, "reward_std": 0.668235182762146, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10616325783875966, "rewards/wordcountpos_reward/raw_geo/std": 0.11311148977226781, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1184.3125, "completions/mean_terminated_length": 1184.3125, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.4960992198439688, "frac_reward_zero_std": 0.0, "grad_norm": 2.7186677121367615, "kl": 0.017364501953125, "learning_rate": 6.344944365226644e-07, "loss": 0.0032, "num_tokens": 108240968.0, "reward": 0.0, "reward_std": 0.662962794303894, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008670883534560282, "rewards/wordcountpos_reward/raw_geo/std": 0.07348415376164916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 996.3125, "completions/mean_terminated_length": 962.7333984375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.4962992598519704, "frac_reward_zero_std": 0.0, "grad_norm": 3.5127410498413747, "kl": 0.019012451171875, "learning_rate": 6.341857757632519e-07, "loss": 0.0198, "num_tokens": 108290237.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9373868703842163, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0725025978631159, "rewards/wordcountpos_reward/raw_geo/std": 0.07322611789684075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1446.625, "completions/mean_terminated_length": 1329.2000732421875, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.496499299859972, "frac_reward_zero_std": 0.0, "grad_norm": 2.2658778049380843, "kl": 0.0107879638671875, "learning_rate": 6.338770739544731e-07, "loss": -0.0318, "num_tokens": 108342135.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0504528284072876, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0775739760867907, "rewards/wordcountpos_reward/raw_geo/std": 0.08033865185020445, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1181.9375, "completions/mean_terminated_length": 1160.7333984375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.4966993398679736, "frac_reward_zero_std": 0.0, "grad_norm": 3.168046438739839, "kl": 0.0185546875, "learning_rate": 6.335683312468518e-07, "loss": 0.0111, "num_tokens": 108380686.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3198065161705017, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.043655104219377704, "rewards/wordcountpos_reward/raw_geo/std": 0.09573248437656702, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1150.8125, "completions/mean_terminated_length": 1150.8125, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.4968993798759752, "frac_reward_zero_std": 0.0, "grad_norm": 3.0839195544275086, "kl": 0.0167083740234375, "learning_rate": 6.332595477909328e-07, "loss": -0.0282, "num_tokens": 108419827.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5800188183784485, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09609850310518664, "rewards/wordcountpos_reward/raw_geo/std": 0.12543750472520476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1088.1875, "completions/mean_terminated_length": 1088.1875, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.4970994198839768, "frac_reward_zero_std": 0.0, "grad_norm": 3.506497568835943, "kl": 0.017242431640625, "learning_rate": 6.329507237372803e-07, "loss": -0.0226, "num_tokens": 108452606.0, "reward": 1.4901161193847656e-08, "reward_std": 0.996084988117218, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07540383562475708, "rewards/wordcountpos_reward/raw_geo/std": 0.04061627154280729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1011.5625, "completions/mean_terminated_length": 1011.5625, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.4972994598919784, "frac_reward_zero_std": 0.0, "grad_norm": 3.8569005166459194, "kl": 0.0171966552734375, "learning_rate": 6.326418592364784e-07, "loss": 0.0015, "num_tokens": 108494247.0, "reward": 0.0, "reward_std": 0.7294982671737671, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.008718763193570888, "rewards/wordcountpos_reward/raw_geo/std": 0.15132359694679695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1049.125, "completions/mean_terminated_length": 1019.0667114257812, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.49749949989998, "frac_reward_zero_std": 0.0, "grad_norm": 2.533779759293995, "kl": 0.01251220703125, "learning_rate": 6.323329544391303e-07, "loss": -0.0744, "num_tokens": 108530305.0, "reward": 0.0, "reward_std": 0.7738298177719116, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19667648018198738, "rewards/wordcountpos_reward/raw_geo/std": 0.2119421014780569, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1123.5, "completions/mean_terminated_length": 1123.5, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.4976995399079816, "frac_reward_zero_std": 0.0, "grad_norm": 3.4436269202722225, "kl": 0.0228271484375, "learning_rate": 6.320240094958601e-07, "loss": -0.0019, "num_tokens": 108567849.0, "reward": -5.960464477539063e-08, "reward_std": 0.9493604302406311, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15158108868532383, "rewards/wordcountpos_reward/raw_geo/std": 0.0818233194193613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1424.4375, "completions/mean_terminated_length": 1348.875, "completions/min_length": 1200.0, "completions/min_terminated_length": 1200.0, "epoch": 0.4978995799159832, "frac_reward_zero_std": 0.0, "grad_norm": 2.913078806512721, "kl": 0.0167083740234375, "learning_rate": 6.317150245573101e-07, "loss": 0.0078, "num_tokens": 108613296.0, "reward": 0.0, "reward_std": 0.6681642532348633, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04400319784863241, "rewards/wordcountpos_reward/raw_geo/std": 0.17399185392126348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 990.0, "completions/mean_terminated_length": 990.0, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.49809961992398477, "frac_reward_zero_std": 0.0, "grad_norm": 2.90708411298445, "kl": 0.0162811279296875, "learning_rate": 6.314059997741432e-07, "loss": -0.034, "num_tokens": 108657896.0, "reward": 0.0, "reward_std": 0.797179102897644, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03366853173678948, "rewards/wordcountpos_reward/raw_geo/std": 0.08451496222297934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1114.9375, "completions/mean_terminated_length": 1114.9375, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.4982996599319864, "frac_reward_zero_std": 0.0, "grad_norm": 3.6158858438791808, "kl": 0.021026611328125, "learning_rate": 6.310969352970413e-07, "loss": -0.0214, "num_tokens": 108699351.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0331881046295166, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04032456824255471, "rewards/wordcountpos_reward/raw_geo/std": 0.18482993519491328, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1262.125, "completions/mean_terminated_length": 1207.2308349609375, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.498499699939988, "frac_reward_zero_std": 0.0, "grad_norm": 3.0415976332798498, "kl": 0.01519775390625, "learning_rate": 6.307878312767053e-07, "loss": -0.022, "num_tokens": 108742617.0, "reward": 0.0, "reward_std": 0.8745841383934021, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06121874828318946, "rewards/wordcountpos_reward/raw_geo/std": 0.09397621043809605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1178.75, "completions/mean_terminated_length": 1071.666748046875, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.4986997399479896, "frac_reward_zero_std": 0.0, "grad_norm": 2.6582381556737387, "kl": 0.0125885009765625, "learning_rate": 6.304786878638559e-07, "loss": 0.0233, "num_tokens": 108786013.0, "reward": 0.0, "reward_std": 0.5988030433654785, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18455667153238225, "rewards/wordcountpos_reward/raw_geo/std": 0.07235178114264847, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1136.25, "completions/mean_terminated_length": 1136.25, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.4988997799559912, "frac_reward_zero_std": 0.0, "grad_norm": 2.9480218571984773, "kl": 0.0161590576171875, "learning_rate": 6.30169505209233e-07, "loss": -0.0156, "num_tokens": 108834169.0, "reward": -2.9802322387695312e-08, "reward_std": 0.862307071685791, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09174466792925474, "rewards/wordcountpos_reward/raw_geo/std": 0.06770351327726458, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1171.625, "completions/mean_terminated_length": 1171.625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.4990998199639928, "frac_reward_zero_std": 0.0, "grad_norm": 3.1334006275481974, "kl": 0.0176544189453125, "learning_rate": 6.298602834635952e-07, "loss": -0.0227, "num_tokens": 108887683.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0190402269363403, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00920525863871537, "rewards/wordcountpos_reward/raw_geo/std": 0.041151925768494114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.15563490039905004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1042.3125, "completions/mean_terminated_length": 1011.800048828125, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.4992998599719944, "frac_reward_zero_std": 0.0, "grad_norm": 3.548877522521723, "kl": 0.0254058837890625, "learning_rate": 6.295510227777206e-07, "loss": 0.0084, "num_tokens": 108929008.0, "reward": 0.0, "reward_std": 0.6199061870574951, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0013388883203886643, "rewards/wordcountpos_reward/raw_geo/std": 0.06880967724261954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1147.75, "completions/mean_terminated_length": 1147.75, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.499499899979996, "frac_reward_zero_std": 0.0, "grad_norm": 3.3260260671224007, "kl": 0.0167694091796875, "learning_rate": 6.292417233024062e-07, "loss": -0.0401, "num_tokens": 108966460.0, "reward": 1.4901161193847656e-08, "reward_std": 1.018096923828125, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08841434201477005, "rewards/wordcountpos_reward/raw_geo/std": 0.07038124583160071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1123.0667724609375, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.4996999399879976, "frac_reward_zero_std": 0.0, "grad_norm": 2.596482413754992, "kl": 0.013092041015625, "learning_rate": 6.289323851884673e-07, "loss": -0.0289, "num_tokens": 109012126.0, "reward": 0.0, "reward_std": 0.8268246650695801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.045859252016932514, "rewards/wordcountpos_reward/raw_geo/std": 0.06861862792795599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1849424334859464, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1276.4375, "completions/mean_terminated_length": 1174.8182373046875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.4998999799959992, "frac_reward_zero_std": 0.0, "grad_norm": 2.7807365400392685, "kl": 0.016082763671875, "learning_rate": 6.286230085867392e-07, "loss": 0.0133, "num_tokens": 109068685.0, "reward": 2.60770320892334e-08, "reward_std": 1.037712812423706, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00548153520679225, "rewards/wordcountpos_reward/raw_geo/std": 0.06337079561768094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1068.6875, "completions/mean_terminated_length": 1068.6875, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.5001000200040008, "frac_reward_zero_std": 0.0, "grad_norm": 3.147209900074164, "kl": 0.0169525146484375, "learning_rate": 6.283135936480752e-07, "loss": 0.0283, "num_tokens": 109116656.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5377286672592163, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014213864696972296, "rewards/wordcountpos_reward/raw_geo/std": 0.02497317194977981, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1012.0625, "completions/mean_terminated_length": 1012.0625, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.5003000600120024, "frac_reward_zero_std": 0.0, "grad_norm": 3.861660811402539, "kl": 0.01910400390625, "learning_rate": 6.280041405233472e-07, "loss": -0.0402, "num_tokens": 109155641.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6625926494598389, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23233136766252827, "rewards/wordcountpos_reward/raw_geo/std": 0.07869281719481191, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14548768561863465, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 979.25, "completions/mean_terminated_length": 979.25, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.500500100020004, "frac_reward_zero_std": 0.0, "grad_norm": 3.9147433192376804, "kl": 0.022064208984375, "learning_rate": 6.276946493634462e-07, "loss": -0.0503, "num_tokens": 109201085.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9099222421646118, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024947442067552825, "rewards/wordcountpos_reward/raw_geo/std": 0.04721275260163974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.65, "rewards/wordcountpos_reward/raw_rule/std": 0.24645636680909816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1387.5625, "completions/mean_terminated_length": 1336.45458984375, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.5007001400280056, "frac_reward_zero_std": 0.0, "grad_norm": 2.809124800589583, "kl": 0.017608642578125, "learning_rate": 6.273851203192812e-07, "loss": -0.0094, "num_tokens": 109249310.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5351749658584595, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02381302046926594, "rewards/wordcountpos_reward/raw_geo/std": 0.1556733756135652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1212.4375, "completions/mean_terminated_length": 1212.4375, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.5009001800360072, "frac_reward_zero_std": 0.0, "grad_norm": 2.68639296251914, "kl": 0.0168914794921875, "learning_rate": 6.270755535417802e-07, "loss": -0.0155, "num_tokens": 109295485.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5378513336181641, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044983293839917546, "rewards/wordcountpos_reward/raw_geo/std": 0.120316803860058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1090.5, "completions/mean_terminated_length": 1032.0, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.5011002200440088, "frac_reward_zero_std": 0.0, "grad_norm": 3.492660713753773, "kl": 0.01776123046875, "learning_rate": 6.267659491818893e-07, "loss": -0.0004, "num_tokens": 109334157.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8980975151062012, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015440637575469644, "rewards/wordcountpos_reward/raw_geo/std": 0.08474807033827592, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 970.125, "completions/mean_terminated_length": 970.125, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.5013002600520104, "frac_reward_zero_std": 0.0, "grad_norm": 3.979804635162571, "kl": 0.0213623046875, "learning_rate": 6.26456307390573e-07, "loss": -0.0253, "num_tokens": 109385479.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7967420220375061, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04820118684177061, "rewards/wordcountpos_reward/raw_geo/std": 0.20128696616055114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1129.125, "completions/mean_terminated_length": 1104.4000244140625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.501500300060012, "frac_reward_zero_std": 0.0, "grad_norm": 3.368111365026265, "kl": 0.01715087890625, "learning_rate": 6.261466283188141e-07, "loss": 0.0208, "num_tokens": 109426345.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8443211317062378, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006133598534805865, "rewards/wordcountpos_reward/raw_geo/std": 0.09723855343132215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1250.625, "completions/mean_terminated_length": 1193.0770263671875, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.5017003400680136, "frac_reward_zero_std": 0.0, "grad_norm": 3.5255216619396017, "kl": 0.019622802734375, "learning_rate": 6.258369121176132e-07, "loss": 0.0024, "num_tokens": 109471891.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0198285579681396, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.6435693507497399, "rewards/wordcountpos_reward/raw_geo/std": 0.029747185656347465, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1086.5, "completions/mean_terminated_length": 1086.5, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.5019003800760152, "frac_reward_zero_std": 0.0, "grad_norm": 2.7410388071385396, "kl": 0.0174102783203125, "learning_rate": 6.255271589379897e-07, "loss": 0.0146, "num_tokens": 109502363.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9537344574928284, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0009313556906833799, "rewards/wordcountpos_reward/raw_geo/std": 0.11095347883160525, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1177.6875, "completions/mean_terminated_length": 1156.2000732421875, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.5021004200840168, "frac_reward_zero_std": 0.0, "grad_norm": 3.4447622293053404, "kl": 0.021148681640625, "learning_rate": 6.252173689309805e-07, "loss": 0.0116, "num_tokens": 109549230.0, "reward": 3.725290298461914e-09, "reward_std": 1.0510153770446777, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.025424432689836962, "rewards/wordcountpos_reward/raw_geo/std": 0.14556150339911628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1268.75, "completions/mean_terminated_length": 1253.3333740234375, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.5023004600920185, "frac_reward_zero_std": 0.0, "grad_norm": 3.083001806291637, "kl": 0.0162353515625, "learning_rate": 6.249075422476409e-07, "loss": 0.0118, "num_tokens": 109588730.0, "reward": 5.960464477539063e-08, "reward_std": 0.6213293075561523, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.058318020534471306, "rewards/wordcountpos_reward/raw_geo/std": 0.22285691695426743, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1103.625, "completions/mean_terminated_length": 1103.625, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.50250050010002, "frac_reward_zero_std": 0.0, "grad_norm": 3.5795414344381355, "kl": 0.020782470703125, "learning_rate": 6.245976790390434e-07, "loss": 0.0043, "num_tokens": 109629044.0, "reward": 0.0, "reward_std": 0.8230456113815308, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19390545724889657, "rewards/wordcountpos_reward/raw_geo/std": 0.11472201143654451, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1204.5625, "completions/mean_terminated_length": 1204.5625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.5027005401080216, "frac_reward_zero_std": 0.0, "grad_norm": 2.5546112869811166, "kl": 0.01397705078125, "learning_rate": 6.24287779456279e-07, "loss": -0.0002, "num_tokens": 109671709.0, "reward": 0.0, "reward_std": 0.7331428527832031, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.047805509517596524, "rewards/wordcountpos_reward/raw_geo/std": 0.11245173276282035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1077.375, "completions/mean_terminated_length": 1077.375, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.5029005801160232, "frac_reward_zero_std": 0.0, "grad_norm": 3.4417739942106724, "kl": 0.019012451171875, "learning_rate": 6.239778436504557e-07, "loss": -0.0022, "num_tokens": 109720987.0, "reward": 0.0, "reward_std": 0.9875365495681763, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09574858449239325, "rewards/wordcountpos_reward/raw_geo/std": 0.09412547252143579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 990.1875, "completions/mean_terminated_length": 990.1875, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.5031006201240248, "frac_reward_zero_std": 0.0, "grad_norm": 3.8431101790367808, "kl": 0.025238037109375, "learning_rate": 6.236678717727002e-07, "loss": 0.018, "num_tokens": 109766310.0, "reward": 0.0, "reward_std": 0.6766756772994995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10588091895184247, "rewards/wordcountpos_reward/raw_geo/std": 0.1246314238666385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189793, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1221.75, "completions/mean_terminated_length": 1203.2000732421875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5033006601320265, "frac_reward_zero_std": 0.0, "grad_norm": 2.1884281421339495, "kl": 0.0146026611328125, "learning_rate": 6.233578639741558e-07, "loss": 0.0349, "num_tokens": 109809586.0, "reward": 0.0, "reward_std": 0.757026731967926, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061180947863952645, "rewards/wordcountpos_reward/raw_geo/std": 0.1624032232255344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1093.875, "completions/mean_terminated_length": 1093.875, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.503500700140028, "frac_reward_zero_std": 0.0, "grad_norm": 3.2173706318632926, "kl": 0.0192108154296875, "learning_rate": 6.230478204059837e-07, "loss": 0.01, "num_tokens": 109850288.0, "reward": 0.0, "reward_std": 0.7763670682907104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.055647411424448276, "rewards/wordcountpos_reward/raw_geo/std": 0.07272849106657611, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1380.25, "completions/mean_terminated_length": 1260.5, "completions/min_length": 1087.0, "completions/min_terminated_length": 1087.0, "epoch": 0.5037007401480296, "frac_reward_zero_std": 0.0, "grad_norm": 2.4981253062288884, "kl": 0.0111236572265625, "learning_rate": 6.227377412193628e-07, "loss": -0.0035, "num_tokens": 109892308.0, "reward": -5.960464477539063e-08, "reward_std": 0.5685840249061584, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12725955247917134, "rewards/wordcountpos_reward/raw_geo/std": 0.05683281593391116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1193.3125, "completions/mean_terminated_length": 1193.3125, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.5039007801560312, "frac_reward_zero_std": 0.0, "grad_norm": 2.915572984533304, "kl": 0.0149993896484375, "learning_rate": 6.22427626565489e-07, "loss": -0.0176, "num_tokens": 109932673.0, "reward": -7.450580596923828e-09, "reward_std": 1.0480660200119019, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09353597401221414, "rewards/wordcountpos_reward/raw_geo/std": 0.10269195678131013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1188.9375, "completions/mean_terminated_length": 1188.9375, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.5041008201640328, "frac_reward_zero_std": 0.0, "grad_norm": 3.045264807243928, "kl": 0.022491455078125, "learning_rate": 6.221174765955755e-07, "loss": -0.0193, "num_tokens": 109976424.0, "reward": 0.0, "reward_std": 0.7035437822341919, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0492841333451349, "rewards/wordcountpos_reward/raw_geo/std": 0.10896476935701346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1107.0, "completions/mean_terminated_length": 1107.0, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.5043008601720345, "frac_reward_zero_std": 0.0, "grad_norm": 3.794602608655059, "kl": 0.02099609375, "learning_rate": 6.218072914608528e-07, "loss": -0.0235, "num_tokens": 110021544.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8670692443847656, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14270112883726957, "rewards/wordcountpos_reward/raw_geo/std": 0.23052279981218937, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1163.625, "completions/mean_terminated_length": 1163.625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.504500900180036, "frac_reward_zero_std": 0.0, "grad_norm": 3.1409954180506636, "kl": 0.0184326171875, "learning_rate": 6.214970713125691e-07, "loss": -0.0413, "num_tokens": 110075314.0, "reward": -3.725290298461914e-08, "reward_std": 1.013209342956543, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18479645450680374, "rewards/wordcountpos_reward/raw_geo/std": 0.0803648413684936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1112.9375, "completions/mean_terminated_length": 1112.9375, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.5047009401880376, "frac_reward_zero_std": 0.0, "grad_norm": 2.793584394311455, "kl": 0.0152587890625, "learning_rate": 6.211868163019885e-07, "loss": 0.008, "num_tokens": 110125825.0, "reward": 0.0, "reward_std": 0.7968648076057434, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05184336906004493, "rewards/wordcountpos_reward/raw_geo/std": 0.08751034536333821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 955.8125, "completions/mean_terminated_length": 955.8125, "completions/min_length": 548.0, "completions/min_terminated_length": 548.0, "epoch": 0.5049009801960392, "frac_reward_zero_std": 0.0, "grad_norm": 3.3605314853699397, "kl": 0.0146484375, "learning_rate": 6.208765265803932e-07, "loss": 0.0443, "num_tokens": 110159854.0, "reward": 0.0, "reward_std": 1.0086933374404907, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05791642938762856, "rewards/wordcountpos_reward/raw_geo/std": 0.05497362002274709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1393.0625, "completions/mean_terminated_length": 1344.45458984375, "completions/min_length": 1144.0, "completions/min_terminated_length": 1144.0, "epoch": 0.5051010202040408, "frac_reward_zero_std": 0.0, "grad_norm": 3.0412542477291216, "kl": 0.017486572265625, "learning_rate": 6.205662022990814e-07, "loss": -0.0102, "num_tokens": 110217079.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0217039585113525, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20880304421172882, "rewards/wordcountpos_reward/raw_geo/std": 0.11219692688269206, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1205.1875, "completions/mean_terminated_length": 1185.533447265625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.5053010602120425, "frac_reward_zero_std": 0.0, "grad_norm": 3.3510794391654746, "kl": 0.0185546875, "learning_rate": 6.202558436093691e-07, "loss": 0.0057, "num_tokens": 110263018.0, "reward": 0.0, "reward_std": 0.9271669387817383, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010447106761236183, "rewards/wordcountpos_reward/raw_geo/std": 0.03385634409920289, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 943.1875, "completions/mean_terminated_length": 943.1875, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.505501100220044, "frac_reward_zero_std": 0.0, "grad_norm": 3.3992179925272223, "kl": 0.0181121826171875, "learning_rate": 6.199454506625886e-07, "loss": -0.0008, "num_tokens": 110313365.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9731029272079468, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0029216073909058716, "rewards/wordcountpos_reward/raw_geo/std": 0.09256699177012101, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1216.5, "completions/mean_terminated_length": 1122.0, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.5057011402280456, "frac_reward_zero_std": 0.0, "grad_norm": 2.9460497280467637, "kl": 0.018341064453125, "learning_rate": 6.196350236100885e-07, "loss": -0.0141, "num_tokens": 110356405.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0117175579071045, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14814516100847044, "rewards/wordcountpos_reward/raw_geo/std": 0.08921317808119208, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1115.6875, "completions/mean_terminated_length": 1115.6875, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.5059011802360472, "frac_reward_zero_std": 0.0, "grad_norm": 3.3516478241063226, "kl": 0.0194091796875, "learning_rate": 6.193245626032347e-07, "loss": -0.0132, "num_tokens": 110403544.0, "reward": -2.9802322387695312e-08, "reward_std": 0.641024649143219, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.003352983256501003, "rewards/wordcountpos_reward/raw_geo/std": 0.07796163209991214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1166.875, "completions/mean_terminated_length": 1119.2857666015625, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.5061012202440488, "frac_reward_zero_std": 0.0, "grad_norm": 2.9645075185364775, "kl": 0.0140838623046875, "learning_rate": 6.190140677934096e-07, "loss": -0.0121, "num_tokens": 110451598.0, "reward": 0.0, "reward_std": 0.6043984889984131, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06327335696925322, "rewards/wordcountpos_reward/raw_geo/std": 0.05146430246213766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1083.4375, "completions/mean_terminated_length": 1083.4375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.5063012602520504, "frac_reward_zero_std": 0.0, "grad_norm": 2.9122946948700403, "kl": 0.0152587890625, "learning_rate": 6.187035393320113e-07, "loss": 0.0004, "num_tokens": 110501117.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8311837315559387, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0941715028034634, "rewards/wordcountpos_reward/raw_geo/std": 0.14677333145114627, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1271.1875, "completions/mean_terminated_length": 1255.933349609375, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.506501300260052, "frac_reward_zero_std": 0.0, "grad_norm": 2.4326383209891222, "kl": 0.011474609375, "learning_rate": 6.183929773704555e-07, "loss": -0.0311, "num_tokens": 110541416.0, "reward": 0.0, "reward_std": 0.9023693799972534, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05047571670575422, "rewards/wordcountpos_reward/raw_geo/std": 0.08143760285039134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1120.5, "completions/mean_terminated_length": 1095.2000732421875, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.5067013402680536, "frac_reward_zero_std": 0.0, "grad_norm": 3.142546844861848, "kl": 0.01605224609375, "learning_rate": 6.180823820601732e-07, "loss": -0.0124, "num_tokens": 110583664.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8890565037727356, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10559684314720308, "rewards/wordcountpos_reward/raw_geo/std": 0.22324350416683236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1020.3125, "completions/mean_terminated_length": 1020.3125, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.5069013802760552, "frac_reward_zero_std": 0.0, "grad_norm": 3.5233647115653994, "kl": 0.019012451171875, "learning_rate": 6.177717535526125e-07, "loss": -0.0191, "num_tokens": 110615757.0, "reward": 0.0, "reward_std": 0.961266040802002, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18651098024139878, "rewards/wordcountpos_reward/raw_geo/std": 0.09373313094399914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1216.3125, "completions/mean_terminated_length": 1197.4000244140625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.5071014202840568, "frac_reward_zero_std": 0.0, "grad_norm": 2.920427341167993, "kl": 0.0180511474609375, "learning_rate": 6.174610919992369e-07, "loss": -0.0458, "num_tokens": 110670122.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7148938179016113, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010705760447967255, "rewards/wordcountpos_reward/raw_geo/std": 0.046937312786995276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1072.5, "completions/mean_terminated_length": 1044.0, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.5073014602920584, "frac_reward_zero_std": 0.0, "grad_norm": 2.5521228161192306, "kl": 0.013153076171875, "learning_rate": 6.171503975515265e-07, "loss": -0.0256, "num_tokens": 110710714.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8038941621780396, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05113731124492844, "rewards/wordcountpos_reward/raw_geo/std": 0.11708499972260142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1027.9375, "completions/mean_terminated_length": 1027.9375, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.50750150030006, "frac_reward_zero_std": 0.0, "grad_norm": 3.965123061294292, "kl": 0.020416259765625, "learning_rate": 6.168396703609771e-07, "loss": 0.0073, "num_tokens": 110748217.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9345406889915466, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10876839617915278, "rewards/wordcountpos_reward/raw_geo/std": 0.06019401700732533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 1356.875, "completions/mean_terminated_length": 1172.857177734375, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.5077015403080616, "frac_reward_zero_std": 0.0, "grad_norm": 2.7204377940006506, "kl": 0.011474609375, "learning_rate": 6.165289105791014e-07, "loss": -0.0302, "num_tokens": 110792695.0, "reward": 0.0, "reward_std": 0.9928556680679321, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04756643720058519, "rewards/wordcountpos_reward/raw_geo/std": 0.0593340504930322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1264.625, "completions/mean_terminated_length": 1123.4000244140625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.5079015803160633, "frac_reward_zero_std": 0.0, "grad_norm": 2.866050564816367, "kl": 0.017852783203125, "learning_rate": 6.162181183574264e-07, "loss": 0.0198, "num_tokens": 110845449.0, "reward": 0.0, "reward_std": 0.7089917659759521, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10409704706004666, "rewards/wordcountpos_reward/raw_geo/std": 0.07397681607639504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1100.3125, "completions/mean_terminated_length": 1100.3125, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5081016203240648, "frac_reward_zero_std": 0.0, "grad_norm": 3.2208991200413313, "kl": 0.0166778564453125, "learning_rate": 6.159072938474964e-07, "loss": -0.0105, "num_tokens": 110887118.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9853053092956543, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10914637343118674, "rewards/wordcountpos_reward/raw_geo/std": 0.04643489068969486, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1124.375, "completions/mean_terminated_length": 1099.3333740234375, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.5083016603320664, "frac_reward_zero_std": 0.0, "grad_norm": 2.5174650539649233, "kl": 0.018402099609375, "learning_rate": 6.155964372008706e-07, "loss": -0.0266, "num_tokens": 110928684.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9616243243217468, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0011908683312844748, "rewards/wordcountpos_reward/raw_geo/std": 0.2194269780863561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1170.1875, "completions/mean_terminated_length": 1170.1875, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.508501700340068, "frac_reward_zero_std": 0.0, "grad_norm": 2.9154559731953844, "kl": 0.0152435302734375, "learning_rate": 6.152855485691241e-07, "loss": -0.0013, "num_tokens": 110966199.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6932481527328491, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.039141379785914, "rewards/wordcountpos_reward/raw_geo/std": 0.11683242454793914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 1156.5, "completions/mean_terminated_length": 1107.4285888671875, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.5087017403480696, "frac_reward_zero_std": 0.0, "grad_norm": 3.146945289139845, "kl": 0.01910400390625, "learning_rate": 6.149746281038477e-07, "loss": -0.0081, "num_tokens": 111001975.0, "reward": 0.0, "reward_std": 0.9428539276123047, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10733963737733443, "rewards/wordcountpos_reward/raw_geo/std": 0.0850873900128895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1230.5, "completions/mean_terminated_length": 1192.0, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.5089017803560713, "frac_reward_zero_std": 0.0, "grad_norm": 3.327864033574997, "kl": 0.033111572265625, "learning_rate": 6.146636759566476e-07, "loss": 0.0272, "num_tokens": 111034255.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9917442798614502, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12090984288871959, "rewards/wordcountpos_reward/raw_geo/std": 0.18513510098508826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1367.5625, "completions/mean_terminated_length": 1307.3636474609375, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.5091018203640728, "frac_reward_zero_std": 0.0, "grad_norm": 3.130266063087792, "kl": 0.019989013671875, "learning_rate": 6.143526922791454e-07, "loss": -0.0048, "num_tokens": 111089320.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8086868524551392, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.138367984148557, "rewards/wordcountpos_reward/raw_geo/std": 0.1424647630492762, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1259.5625, "completions/mean_terminated_length": 1243.533447265625, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.5093018603720744, "frac_reward_zero_std": 0.0, "grad_norm": 2.3388709558919025, "kl": 0.0143280029296875, "learning_rate": 6.140416772229784e-07, "loss": -0.0056, "num_tokens": 111127369.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9993166327476501, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05917640285856665, "rewards/wordcountpos_reward/raw_geo/std": 0.14323684784347787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1145.75, "completions/mean_terminated_length": 1122.1334228515625, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.509501900380076, "frac_reward_zero_std": 0.0, "grad_norm": 3.2984065132597116, "kl": 0.0169219970703125, "learning_rate": 6.137306309397986e-07, "loss": 0.0006, "num_tokens": 111167565.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9466355443000793, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025589477193147126, "rewards/wordcountpos_reward/raw_geo/std": 0.11361405373496875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1180.4375, "completions/mean_terminated_length": 1159.1334228515625, "completions/min_length": 611.0, "completions/min_terminated_length": 611.0, "epoch": 0.5097019403880776, "frac_reward_zero_std": 0.0, "grad_norm": 3.5167232163135727, "kl": 0.02069091796875, "learning_rate": 6.134195535812738e-07, "loss": -0.1024, "num_tokens": 111226452.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9937956929206848, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16053562102900165, "rewards/wordcountpos_reward/raw_geo/std": 0.07556132511976268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7708333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.2644000896509065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1159.4375, "completions/mean_terminated_length": 1136.7333984375, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.5099019803960793, "frac_reward_zero_std": 0.0, "grad_norm": 2.875917213162287, "kl": 0.018035888671875, "learning_rate": 6.131084452990867e-07, "loss": -0.0142, "num_tokens": 111274803.0, "reward": -4.470348358154297e-08, "reward_std": 0.8856914639472961, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.030574028830521096, "rewards/wordcountpos_reward/raw_geo/std": 0.06174289596922741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.12292725943057184, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1337.3125, "completions/mean_terminated_length": 1263.3636474609375, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.5101020204040808, "frac_reward_zero_std": 0.0, "grad_norm": 3.0145144398876513, "kl": 0.017608642578125, "learning_rate": 6.127973062449351e-07, "loss": 0.0293, "num_tokens": 111321480.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0215834379196167, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059388666352573526, "rewards/wordcountpos_reward/raw_geo/std": 0.19556411243928354, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1204.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 1045.375, "completions/mean_terminated_length": 1045.375, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.5103020604120824, "frac_reward_zero_std": 0.0, "grad_norm": 3.4764369006962155, "kl": 0.01953125, "learning_rate": 6.124861365705319e-07, "loss": 0.0089, "num_tokens": 111360078.0, "reward": 2.9802322387695312e-08, "reward_std": 0.648969292640686, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1289904671688676, "rewards/wordcountpos_reward/raw_geo/std": 0.1597658849135179, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1039.9375, "completions/mean_terminated_length": 1039.9375, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.510502100420084, "frac_reward_zero_std": 0.0, "grad_norm": 3.0329970301841587, "kl": 0.039825439453125, "learning_rate": 6.121749364276046e-07, "loss": -0.0319, "num_tokens": 111403229.0, "reward": 0.0, "reward_std": 0.7396513819694519, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18135363466645923, "rewards/wordcountpos_reward/raw_geo/std": 0.080965995302249, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567835, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1102.3125, "completions/mean_terminated_length": 1075.800048828125, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.5107021404280856, "frac_reward_zero_std": 0.0, "grad_norm": 3.0013890800550778, "kl": 0.014434814453125, "learning_rate": 6.118637059678962e-07, "loss": -0.0424, "num_tokens": 111457546.0, "reward": 0.0, "reward_std": 0.9087873697280884, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09171314002927793, "rewards/wordcountpos_reward/raw_geo/std": 0.17463981481583607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1120.8125, "completions/mean_terminated_length": 1120.8125, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.5109021804360873, "frac_reward_zero_std": 0.0, "grad_norm": 2.6275526328799765, "kl": 0.0202178955078125, "learning_rate": 6.115524453431636e-07, "loss": -0.0052, "num_tokens": 111506047.0, "reward": 2.9802322387695312e-08, "reward_std": 0.670314371585846, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1264161065720459, "rewards/wordcountpos_reward/raw_geo/std": 0.0817821650890573, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1255.375, "completions/mean_terminated_length": 1255.375, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.5111022204440888, "frac_reward_zero_std": 0.0, "grad_norm": 2.379926952229829, "kl": 0.012298583984375, "learning_rate": 6.112411547051794e-07, "loss": -0.0193, "num_tokens": 111552917.0, "reward": 0.0, "reward_std": 0.9524004459381104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05370629003361451, "rewards/wordcountpos_reward/raw_geo/std": 0.34105716756275567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1048.0625, "completions/mean_terminated_length": 1048.0625, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.5113022604520904, "frac_reward_zero_std": 0.0, "grad_norm": 1.1468616705590113, "kl": 0.0032100677490234375, "learning_rate": 6.109298342057299e-07, "loss": -0.0059, "num_tokens": 111585678.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7334751486778259, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024104137414469012, "rewards/wordcountpos_reward/raw_geo/std": 0.031843002701554615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1464.5625, "completions/mean_terminated_length": 1437.0, "completions/min_length": 1309.0, "completions/min_terminated_length": 1309.0, "epoch": 0.511502300460092, "frac_reward_zero_std": 0.0, "grad_norm": 2.5870956448082225, "kl": 0.0126495361328125, "learning_rate": 6.106184839966167e-07, "loss": -0.0008, "num_tokens": 111642639.0, "reward": 0.0, "reward_std": 0.6141958832740784, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08110166640041828, "rewards/wordcountpos_reward/raw_geo/std": 0.06076625103993311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1078.3125, "completions/mean_terminated_length": 1078.3125, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.5117023404680936, "frac_reward_zero_std": 0.0, "grad_norm": 2.9804828077253545, "kl": 0.019439697265625, "learning_rate": 6.103071042296551e-07, "loss": -0.0171, "num_tokens": 111691940.0, "reward": 0.0, "reward_std": 1.0468745231628418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05569785609725945, "rewards/wordcountpos_reward/raw_geo/std": 0.08759057711736264, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1124.9375, "completions/mean_terminated_length": 1124.9375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.5119023804760953, "frac_reward_zero_std": 0.0, "grad_norm": 3.348898883746759, "kl": 0.0190582275390625, "learning_rate": 6.09995695056676e-07, "loss": -0.0027, "num_tokens": 111737987.0, "reward": 0.0, "reward_std": 0.8639980554580688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03657214274508321, "rewards/wordcountpos_reward/raw_geo/std": 0.07642936148151157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1159.5, "completions/mean_terminated_length": 1159.5, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.5121024204840968, "frac_reward_zero_std": 0.0, "grad_norm": 3.2684079451346717, "kl": 0.019073486328125, "learning_rate": 6.096842566295234e-07, "loss": 0.0095, "num_tokens": 111773619.0, "reward": 0.0, "reward_std": 1.0381957292556763, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.214813899477218, "rewards/wordcountpos_reward/raw_geo/std": 0.16598812200476498, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1237.3125, "completions/mean_terminated_length": 1237.3125, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.5123024604920984, "frac_reward_zero_std": 0.0, "grad_norm": 2.8782863655091426, "kl": 0.0132904052734375, "learning_rate": 6.093727891000562e-07, "loss": -0.0211, "num_tokens": 111819224.0, "reward": -1.4901161193847656e-08, "reward_std": 1.012759804725647, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023186942970012548, "rewards/wordcountpos_reward/raw_geo/std": 0.10217886114225919, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1098.0, "completions/mean_terminated_length": 1098.0, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.5125025005001, "frac_reward_zero_std": 0.0, "grad_norm": 3.153313032408992, "kl": 0.0160369873046875, "learning_rate": 6.090612926201476e-07, "loss": -0.0126, "num_tokens": 111864168.0, "reward": 0.0, "reward_std": 0.6521433591842651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03236499048573532, "rewards/wordcountpos_reward/raw_geo/std": 0.14120200779422176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1567612007930345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 899.25, "completions/mean_terminated_length": 899.25, "completions/min_length": 590.0, "completions/min_terminated_length": 590.0, "epoch": 0.5127025405081016, "frac_reward_zero_std": 0.0, "grad_norm": 4.034415482422827, "kl": 0.020660400390625, "learning_rate": 6.087497673416844e-07, "loss": -0.0565, "num_tokens": 111906476.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7674745321273804, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2481490722431411, "rewards/wordcountpos_reward/raw_geo/std": 0.35978877773574924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1117.0625, "completions/mean_terminated_length": 1062.357177734375, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5129025805161033, "frac_reward_zero_std": 0.0, "grad_norm": 2.820725906316868, "kl": 0.0138702392578125, "learning_rate": 6.084382134165679e-07, "loss": 0.0052, "num_tokens": 111959517.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0006263256072998, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057281690686209204, "rewards/wordcountpos_reward/raw_geo/std": 0.11682690121937461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 1000.625, "completions/mean_terminated_length": 1000.625, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.5131026205241048, "frac_reward_zero_std": 0.0, "grad_norm": 3.3981813279893505, "kl": 0.0159759521484375, "learning_rate": 6.081266309967132e-07, "loss": -0.0299, "num_tokens": 111990735.0, "reward": 0.0, "reward_std": 0.9852697849273682, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05924708704105534, "rewards/wordcountpos_reward/raw_geo/std": 0.06852662652531337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1152.3125, "completions/mean_terminated_length": 1129.1334228515625, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.5133026605321064, "frac_reward_zero_std": 0.0, "grad_norm": 3.1236552003752105, "kl": 0.0151824951171875, "learning_rate": 6.078150202340493e-07, "loss": -0.0227, "num_tokens": 112037892.0, "reward": 0.0, "reward_std": 0.7709812521934509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014867699197621629, "rewards/wordcountpos_reward/raw_geo/std": 0.10877699823608235, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.1803289175881631, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1172.125, "completions/mean_terminated_length": 1172.125, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.513502700540108, "frac_reward_zero_std": 0.0, "grad_norm": 2.730953043923854, "kl": 0.013458251953125, "learning_rate": 6.075033812805189e-07, "loss": 0.0089, "num_tokens": 112082958.0, "reward": 0.0, "reward_std": 0.7064900398254395, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04572678949345872, "rewards/wordcountpos_reward/raw_geo/std": 0.08038008279349995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11287488977066928, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1100.8125, "completions/mean_terminated_length": 1100.8125, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.5137027405481096, "frac_reward_zero_std": 0.0, "grad_norm": 2.8794499013609336, "kl": 0.0171966552734375, "learning_rate": 6.071917142880788e-07, "loss": -0.0016, "num_tokens": 112121867.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8379423022270203, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013031755496874948, "rewards/wordcountpos_reward/raw_geo/std": 0.0980011217388431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1111.5625, "completions/mean_terminated_length": 1111.5625, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.5139027805561113, "frac_reward_zero_std": 0.0, "grad_norm": 3.343773927415282, "kl": 0.0164031982421875, "learning_rate": 6.068800194086989e-07, "loss": -0.027, "num_tokens": 112159660.0, "reward": -3.5390257835388184e-08, "reward_std": 0.9403839111328125, "rewards/wordcountpos_reward/mean": -3.5390257835388184e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13326955932888923, "rewards/wordcountpos_reward/raw_geo/std": 0.2041007276950269, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1189.375, "completions/mean_terminated_length": 1189.375, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.5141028205641128, "frac_reward_zero_std": 0.0, "grad_norm": 3.0154786958492004, "kl": 0.018646240234375, "learning_rate": 6.065682967943634e-07, "loss": -0.003, "num_tokens": 112202186.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8912142515182495, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0297101013967409, "rewards/wordcountpos_reward/raw_geo/std": 0.19895145866092856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1225.75, "completions/mean_terminated_length": 1225.75, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.5143028605721144, "frac_reward_zero_std": 0.0, "grad_norm": 2.346711132305472, "kl": 0.0135040283203125, "learning_rate": 6.062565465970695e-07, "loss": 0.0003, "num_tokens": 112242870.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6920846104621887, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04441434966064999, "rewards/wordcountpos_reward/raw_geo/std": 0.10365322435759344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1084.875, "completions/mean_terminated_length": 1084.875, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.514502900580116, "frac_reward_zero_std": 0.0, "grad_norm": 3.364611613971905, "kl": 0.02032470703125, "learning_rate": 6.059447689688281e-07, "loss": 0.0335, "num_tokens": 112293364.0, "reward": 0.0, "reward_std": 0.7884864807128906, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07731106264371035, "rewards/wordcountpos_reward/raw_geo/std": 0.1783750707163222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1322.8125, "completions/mean_terminated_length": 1297.5, "completions/min_length": 1135.0, "completions/min_terminated_length": 1135.0, "epoch": 0.5147029405881176, "frac_reward_zero_std": 0.0, "grad_norm": 3.0548630511167043, "kl": 0.016693115234375, "learning_rate": 6.056329640616632e-07, "loss": -0.0216, "num_tokens": 112342273.0, "reward": 0.0, "reward_std": 0.9606549143791199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07466138496802287, "rewards/wordcountpos_reward/raw_geo/std": 0.22392748668545961, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1128.375, "completions/mean_terminated_length": 1128.375, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.5149029805961193, "frac_reward_zero_std": 0.0, "grad_norm": 3.330389280456008, "kl": 0.01873779296875, "learning_rate": 6.053211320276124e-07, "loss": -0.0093, "num_tokens": 112384295.0, "reward": 0.0, "reward_std": 0.9887303113937378, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05310390497928168, "rewards/wordcountpos_reward/raw_geo/std": 0.07801143072814991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.077817450199525, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 975.5625, "completions/mean_terminated_length": 975.5625, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.5151030206041208, "frac_reward_zero_std": 0.0, "grad_norm": 3.1029619303913694, "kl": 0.0133056640625, "learning_rate": 6.050092730187265e-07, "loss": -0.0163, "num_tokens": 112427296.0, "reward": 0.0, "reward_std": 1.0387167930603027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10978072491541191, "rewards/wordcountpos_reward/raw_geo/std": 0.11705219246884783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1223.5625, "completions/mean_terminated_length": 1205.1334228515625, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.5153030606121224, "frac_reward_zero_std": 0.0, "grad_norm": 2.6726469509920685, "kl": 0.0126953125, "learning_rate": 6.046973871870692e-07, "loss": 0.0092, "num_tokens": 112471433.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0274410247802734, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0005379985334809578, "rewards/wordcountpos_reward/raw_geo/std": 0.09200469917429181, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1292.4375, "completions/mean_terminated_length": 1292.4375, "completions/min_length": 1086.0, "completions/min_terminated_length": 1086.0, "epoch": 0.515503100620124, "frac_reward_zero_std": 0.0, "grad_norm": 2.8084168298420336, "kl": 0.015472412109375, "learning_rate": 6.043854746847175e-07, "loss": 0.0098, "num_tokens": 112524552.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8860631585121155, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07199027619080359, "rewards/wordcountpos_reward/raw_geo/std": 0.07731395970857889, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1100.6875, "completions/mean_terminated_length": 1100.6875, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.5157031406281256, "frac_reward_zero_std": 0.0, "grad_norm": 2.767603597452279, "kl": 0.01422119140625, "learning_rate": 6.040735356637615e-07, "loss": -0.0066, "num_tokens": 112564171.0, "reward": 0.0, "reward_std": 0.9031015634536743, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019982058938088087, "rewards/wordcountpos_reward/raw_geo/std": 0.19126650342250975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1270.5625, "completions/mean_terminated_length": 1237.7857666015625, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.5159031806361273, "frac_reward_zero_std": 0.0, "grad_norm": 3.0413863342452725, "kl": 0.01751708984375, "learning_rate": 6.03761570276304e-07, "loss": 0.0316, "num_tokens": 112615180.0, "reward": 0.0, "reward_std": 0.9037442803382874, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016122923832147557, "rewards/wordcountpos_reward/raw_geo/std": 0.11602464192136108, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1121.5625, "completions/mean_terminated_length": 1121.5625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.5161032206441288, "frac_reward_zero_std": 0.0, "grad_norm": 3.658147623267037, "kl": 0.02142333984375, "learning_rate": 6.034495786744605e-07, "loss": 0.0148, "num_tokens": 112657293.0, "reward": 1.4901161193847656e-08, "reward_std": 0.969143271446228, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11181550563787179, "rewards/wordcountpos_reward/raw_geo/std": 0.0910616749193732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1036.8125, "completions/mean_terminated_length": 1036.8125, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.5163032606521304, "frac_reward_zero_std": 0.0, "grad_norm": 2.563223425455987, "kl": 0.01202392578125, "learning_rate": 6.031375610103599e-07, "loss": -0.0287, "num_tokens": 112689906.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0508956909179688, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1072008737927809, "rewards/wordcountpos_reward/raw_geo/std": 0.09561524676787576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 993.3125, "completions/mean_terminated_length": 993.3125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.5165033006601321, "frac_reward_zero_std": 0.0, "grad_norm": 4.027634544460468, "kl": 0.022796630859375, "learning_rate": 6.028255174361433e-07, "loss": 0.0268, "num_tokens": 112738967.0, "reward": 0.0, "reward_std": 0.9060863852500916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06873580021534711, "rewards/wordcountpos_reward/raw_geo/std": 0.06249677131171885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 905.375, "completions/mean_terminated_length": 905.375, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.5167033406681336, "frac_reward_zero_std": 0.0, "grad_norm": 3.0939281921883626, "kl": 0.016876220703125, "learning_rate": 6.025134481039643e-07, "loss": -0.0165, "num_tokens": 112785285.0, "reward": 2.9802322387695312e-08, "reward_std": 0.575707197189331, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04704228896047749, "rewards/wordcountpos_reward/raw_geo/std": 0.2992898806524493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1202.0, "completions/mean_terminated_length": 1182.1334228515625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.5169033806761353, "frac_reward_zero_std": 0.0, "grad_norm": 3.264938072705864, "kl": 0.015594482421875, "learning_rate": 6.022013531659897e-07, "loss": -0.0214, "num_tokens": 112829997.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7314637899398804, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04254829338464556, "rewards/wordcountpos_reward/raw_geo/std": 0.10833032703567425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1190.3125, "completions/mean_terminated_length": 1169.666748046875, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.5171034206841368, "frac_reward_zero_std": 0.0, "grad_norm": 3.1459680628514155, "kl": 0.0175323486328125, "learning_rate": 6.018892327743982e-07, "loss": -0.0345, "num_tokens": 112873178.0, "reward": 0.0, "reward_std": 0.9577480554580688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06675986875368829, "rewards/wordcountpos_reward/raw_geo/std": 0.07501021862038029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1161.75, "completions/mean_terminated_length": 1139.2000732421875, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.5173034606921384, "frac_reward_zero_std": 0.0, "grad_norm": 3.4342542322044225, "kl": 0.0153961181640625, "learning_rate": 6.015770870813813e-07, "loss": -0.0098, "num_tokens": 112915486.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8476356267929077, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01682292181364574, "rewards/wordcountpos_reward/raw_geo/std": 0.03448248578463858, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1158.8125, "completions/mean_terminated_length": 1136.0667724609375, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.5175035007001401, "frac_reward_zero_std": 0.0, "grad_norm": 2.856989856542253, "kl": 0.0145263671875, "learning_rate": 6.012649162391425e-07, "loss": 0.0311, "num_tokens": 112957435.0, "reward": 0.0, "reward_std": 0.6668420433998108, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14205713115224625, "rewards/wordcountpos_reward/raw_geo/std": 0.08288884977251902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1375.6875, "completions/mean_terminated_length": 1334.25, "completions/min_length": 1225.0, "completions/min_terminated_length": 1225.0, "epoch": 0.5177035407081416, "frac_reward_zero_std": 0.0, "grad_norm": 3.0191118739918745, "kl": 0.0184326171875, "learning_rate": 6.009527203998977e-07, "loss": -0.0161, "num_tokens": 113006918.0, "reward": 0.0, "reward_std": 0.7953832745552063, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10526013042458188, "rewards/wordcountpos_reward/raw_geo/std": 0.10696155572822025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1045.3125, "completions/mean_terminated_length": 1045.3125, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.5179035807161432, "frac_reward_zero_std": 0.0, "grad_norm": 3.258302873341528, "kl": 0.021636962890625, "learning_rate": 6.00640499715875e-07, "loss": -0.0489, "num_tokens": 113053299.0, "reward": 0.0, "reward_std": 0.9370324611663818, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06068539125180551, "rewards/wordcountpos_reward/raw_geo/std": 0.10325444328155688, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1281.6875, "completions/mean_terminated_length": 1281.6875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.5181036207241448, "frac_reward_zero_std": 0.0, "grad_norm": 2.08318963620492, "kl": 0.01031494140625, "learning_rate": 6.003282543393148e-07, "loss": 0.0318, "num_tokens": 113094934.0, "reward": 5.587935447692871e-09, "reward_std": 1.0526937246322632, "rewards/wordcountpos_reward/mean": 5.587935447692871e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10696364285863892, "rewards/wordcountpos_reward/raw_geo/std": 0.16237707788656083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1142.625, "completions/mean_terminated_length": 1118.800048828125, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.5183036607321464, "frac_reward_zero_std": 0.0, "grad_norm": 3.1445694597006124, "kl": 0.012359619140625, "learning_rate": 6.000159844224693e-07, "loss": -0.0465, "num_tokens": 113136552.0, "reward": 0.0, "reward_std": 0.9964123964309692, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.053382001682934464, "rewards/wordcountpos_reward/raw_geo/std": 0.05147512374085077, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1331.9375, "completions/mean_terminated_length": 1307.9285888671875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.5185037007401481, "frac_reward_zero_std": 0.0, "grad_norm": 2.4968062269508673, "kl": 0.015289306640625, "learning_rate": 5.997036901176025e-07, "loss": -0.0228, "num_tokens": 113189967.0, "reward": 0.0, "reward_std": 0.6176744699478149, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0567606560296404, "rewards/wordcountpos_reward/raw_geo/std": 0.11571534368274253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1220.875, "completions/mean_terminated_length": 1202.2667236328125, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.5187037407481496, "frac_reward_zero_std": 0.0, "grad_norm": 3.1097932700871547, "kl": 0.0132904052734375, "learning_rate": 5.993913715769909e-07, "loss": -0.0234, "num_tokens": 113236349.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7051465511322021, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10812399808235772, "rewards/wordcountpos_reward/raw_geo/std": 0.21363531028540464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1099.3125, "completions/mean_terminated_length": 1072.60009765625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.5189037807561512, "frac_reward_zero_std": 0.0, "grad_norm": 3.0188479804499746, "kl": 0.0146636962890625, "learning_rate": 5.99079028952922e-07, "loss": 0.0474, "num_tokens": 113287682.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7919710874557495, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020754819789827662, "rewards/wordcountpos_reward/raw_geo/std": 0.11053227521724618, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1235.4375, "completions/mean_terminated_length": 1197.6429443359375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.5191038207641528, "frac_reward_zero_std": 0.0, "grad_norm": 2.6606741312406763, "kl": 0.012176513671875, "learning_rate": 5.987666623976958e-07, "loss": -0.0346, "num_tokens": 113329345.0, "reward": 0.0, "reward_std": 0.5515407919883728, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.039149622850378415, "rewards/wordcountpos_reward/raw_geo/std": 0.2800266653065461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1041.0625, "completions/mean_terminated_length": 1041.0625, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.5193038607721544, "frac_reward_zero_std": 0.0, "grad_norm": 3.7158125722355737, "kl": 0.02337646484375, "learning_rate": 5.984542720636235e-07, "loss": -0.0797, "num_tokens": 113370826.0, "reward": 0.0, "reward_std": 0.7306674122810364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.012814697890564334, "rewards/wordcountpos_reward/raw_geo/std": 0.1817675026261814, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1146.4375, "completions/mean_terminated_length": 1146.4375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.5195039007801561, "frac_reward_zero_std": 0.0, "grad_norm": 3.205133694744911, "kl": 0.018646240234375, "learning_rate": 5.981418581030283e-07, "loss": 0.0192, "num_tokens": 113404969.0, "reward": 0.0, "reward_std": 0.8245441317558289, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.149744119763735, "rewards/wordcountpos_reward/raw_geo/std": 0.08085860672601956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1157.9375, "completions/mean_terminated_length": 1135.1334228515625, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.5197039407881576, "frac_reward_zero_std": 0.0, "grad_norm": 3.5457619762969923, "kl": 0.0234375, "learning_rate": 5.978294206682443e-07, "loss": -0.0398, "num_tokens": 113449880.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9996188879013062, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10326644230812068, "rewards/wordcountpos_reward/raw_geo/std": 0.07414040898145183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1239.0, "completions/mean_terminated_length": 1239.0, "completions/min_length": 1097.0, "completions/min_terminated_length": 1097.0, "epoch": 0.5199039807961592, "frac_reward_zero_std": 0.0, "grad_norm": 2.7796547134307117, "kl": 0.01422119140625, "learning_rate": 5.975169599116177e-07, "loss": 0.0059, "num_tokens": 113499512.0, "reward": 0.0, "reward_std": 0.5233436822891235, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04906290104483072, "rewards/wordcountpos_reward/raw_geo/std": 0.1597005628323755, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1264.6875, "completions/mean_terminated_length": 1249.0001220703125, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.5201040208041608, "frac_reward_zero_std": 0.0, "grad_norm": 2.683851821589161, "kl": 0.0162811279296875, "learning_rate": 5.972044759855057e-07, "loss": -0.0009, "num_tokens": 113537939.0, "reward": 1.1175870895385742e-08, "reward_std": 0.9348875284194946, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06990732435823109, "rewards/wordcountpos_reward/raw_geo/std": 0.09837833352436291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1011.5625, "completions/mean_terminated_length": 1011.5625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.5203040608121624, "frac_reward_zero_std": 0.0, "grad_norm": 3.434949510162403, "kl": 0.01666259765625, "learning_rate": 5.968919690422768e-07, "loss": -0.0188, "num_tokens": 113574012.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8271220922470093, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.016032257514318703, "rewards/wordcountpos_reward/raw_geo/std": 0.10942396599704372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 990.9375, "completions/mean_terminated_length": 873.4615478515625, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.5205041008201641, "frac_reward_zero_std": 0.0, "grad_norm": 3.172990952985458, "kl": 0.0148773193359375, "learning_rate": 5.965794392343109e-07, "loss": 0.0471, "num_tokens": 113617323.0, "reward": 0.0, "reward_std": 0.9950593709945679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06767023724963676, "rewards/wordcountpos_reward/raw_geo/std": 0.05217467896681125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1118.5625, "completions/mean_terminated_length": 1093.1334228515625, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.5207041408281656, "frac_reward_zero_std": 0.0, "grad_norm": 3.009841647787865, "kl": 0.015411376953125, "learning_rate": 5.962668867139989e-07, "loss": -0.0594, "num_tokens": 113663932.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9886813163757324, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020633888148965294, "rewards/wordcountpos_reward/raw_geo/std": 0.07061483056641024, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1266.3125, "completions/mean_terminated_length": 1232.9285888671875, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.5209041808361672, "frac_reward_zero_std": 0.0, "grad_norm": 2.501086009763606, "kl": 0.014617919921875, "learning_rate": 5.959543116337429e-07, "loss": 0.0219, "num_tokens": 113715249.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9951273798942566, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07354272470502374, "rewards/wordcountpos_reward/raw_geo/std": 0.09523128506080723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1140.1875, "completions/mean_terminated_length": 1140.1875, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.5211042208441689, "frac_reward_zero_std": 0.0, "grad_norm": 2.926835161584277, "kl": 0.019561767578125, "learning_rate": 5.956417141459556e-07, "loss": -0.0501, "num_tokens": 113764396.0, "reward": 0.0, "reward_std": 0.6780602931976318, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02659774449446003, "rewards/wordcountpos_reward/raw_geo/std": 0.06218619304036748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1152.0625, "completions/mean_terminated_length": 1152.0625, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.5213042608521704, "frac_reward_zero_std": 0.0, "grad_norm": 3.042545496611556, "kl": 0.0143280029296875, "learning_rate": 5.953290944030617e-07, "loss": 0.0314, "num_tokens": 113806933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9822260141372681, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04083470197410398, "rewards/wordcountpos_reward/raw_geo/std": 0.06779752502975316, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1035.1875, "completions/mean_terminated_length": 1035.1875, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.5215043008601721, "frac_reward_zero_std": 0.0, "grad_norm": 3.6886687058425975, "kl": 0.021209716796875, "learning_rate": 5.950164525574953e-07, "loss": -0.0015, "num_tokens": 113858104.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9577794075012207, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1124902600192222, "rewards/wordcountpos_reward/raw_geo/std": 0.09305492800073087, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503964, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1238.8125, "completions/mean_terminated_length": 1221.4000244140625, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.5217043408681736, "frac_reward_zero_std": 0.0, "grad_norm": 2.819984764043581, "kl": 0.02593994140625, "learning_rate": 5.94703788761702e-07, "loss": 0.0173, "num_tokens": 113905813.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9493671655654907, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014830315583485637, "rewards/wordcountpos_reward/raw_geo/std": 0.06094724023073102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1143.5, "completions/mean_terminated_length": 1119.7333984375, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.5219043808761752, "frac_reward_zero_std": 0.0, "grad_norm": 3.1345778327916336, "kl": 0.020538330078125, "learning_rate": 5.943911031681386e-07, "loss": -0.0537, "num_tokens": 113947669.0, "reward": -4.470348358154297e-08, "reward_std": 0.9810491800308228, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22323509006745818, "rewards/wordcountpos_reward/raw_geo/std": 0.22002400787178394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1364.3125, "completions/mean_terminated_length": 1138.166748046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.5221044208841769, "frac_reward_zero_std": 0.0, "grad_norm": 3.207446418285774, "kl": 0.0189666748046875, "learning_rate": 5.940783959292715e-07, "loss": -0.0426, "num_tokens": 114001386.0, "reward": 0.0, "reward_std": 0.8414602279663086, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.605510477255987, "rewards/wordcountpos_reward/raw_geo/std": 0.27412345905988506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1109.75, "completions/mean_terminated_length": 1109.75, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.5223044608921784, "frac_reward_zero_std": 0.0, "grad_norm": 2.1095092898469803, "kl": 0.00815582275390625, "learning_rate": 5.937656671975786e-07, "loss": 0.0142, "num_tokens": 114040478.0, "reward": 0.0, "reward_std": 0.9403321743011475, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3144153687374331, "rewards/wordcountpos_reward/raw_geo/std": 0.3212246441564556, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1067.3125, "completions/mean_terminated_length": 1038.4666748046875, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.5225045009001801, "frac_reward_zero_std": 0.0, "grad_norm": 3.6158153032404257, "kl": 0.0183258056640625, "learning_rate": 5.934529171255474e-07, "loss": -0.0185, "num_tokens": 114080555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5376211404800415, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15471448497901805, "rewards/wordcountpos_reward/raw_geo/std": 0.18772844564554217, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1139.625, "completions/mean_terminated_length": 1139.625, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.5227045409081816, "frac_reward_zero_std": 0.0, "grad_norm": 3.209942882451174, "kl": 0.01751708984375, "learning_rate": 5.931401458656767e-07, "loss": -0.0262, "num_tokens": 114130109.0, "reward": 0.0, "reward_std": 0.7312842011451721, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01052012650557896, "rewards/wordcountpos_reward/raw_geo/std": 0.21717551676514896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1190.875, "completions/mean_terminated_length": 1190.875, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.5229045809161832, "frac_reward_zero_std": 0.0, "grad_norm": 2.466585479381237, "kl": 0.01312255859375, "learning_rate": 5.92827353570475e-07, "loss": 0.0061, "num_tokens": 114176067.0, "reward": 3.3527612686157227e-08, "reward_std": 1.0661890506744385, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0033609149297972596, "rewards/wordcountpos_reward/raw_geo/std": 0.1887944060790796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1078.25, "completions/mean_terminated_length": 1078.25, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.5231046209241849, "frac_reward_zero_std": 0.0, "grad_norm": 3.4252004930706335, "kl": 0.0166168212890625, "learning_rate": 5.925145403924611e-07, "loss": 0.0184, "num_tokens": 114218567.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9461495876312256, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009820629019758885, "rewards/wordcountpos_reward/raw_geo/std": 0.04015828683289239, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1211.6875, "completions/mean_terminated_length": 1170.5, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.5233046609321864, "frac_reward_zero_std": 0.0, "grad_norm": 2.87763975853441, "kl": 0.0143280029296875, "learning_rate": 5.922017064841643e-07, "loss": -0.0393, "num_tokens": 114267922.0, "reward": 0.0, "reward_std": 0.7973343729972839, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02241973663316902, "rewards/wordcountpos_reward/raw_geo/std": 0.16539009335462254, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1223.5, "completions/mean_terminated_length": 1097.8182373046875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.5235047009401881, "frac_reward_zero_std": 0.0, "grad_norm": 3.065273883719721, "kl": 0.01904296875, "learning_rate": 5.918888519981239e-07, "loss": -0.0237, "num_tokens": 114301922.0, "reward": 7.450580596923828e-09, "reward_std": 1.0633296966552734, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010808048292688466, "rewards/wordcountpos_reward/raw_geo/std": 0.1782341896113361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1370.3125, "completions/mean_terminated_length": 1327.0833740234375, "completions/min_length": 1136.0, "completions/min_terminated_length": 1136.0, "epoch": 0.5237047409481896, "frac_reward_zero_std": 0.0, "grad_norm": 2.7497260635143648, "kl": 0.0166168212890625, "learning_rate": 5.915759770868889e-07, "loss": -0.0241, "num_tokens": 114358463.0, "reward": 0.0, "reward_std": 1.0216264724731445, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.049653392718927596, "rewards/wordcountpos_reward/raw_geo/std": 0.13358084363159764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1266.1875, "completions/mean_terminated_length": 1232.7857666015625, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.5239047809561912, "frac_reward_zero_std": 0.0, "grad_norm": 3.0735264393646062, "kl": 0.0189361572265625, "learning_rate": 5.912630819030185e-07, "loss": -0.0082, "num_tokens": 114405842.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9412484169006348, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09082312046355617, "rewards/wordcountpos_reward/raw_geo/std": 0.1380878011469687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1162.1875, "completions/mean_terminated_length": 1049.5833740234375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.5241048209641929, "frac_reward_zero_std": 0.0, "grad_norm": 2.4267535686674035, "kl": 0.0118865966796875, "learning_rate": 5.909501665990824e-07, "loss": -0.0018, "num_tokens": 114448469.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6956161260604858, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.5138222168296197, "rewards/wordcountpos_reward/raw_geo/std": 0.37175987271930283, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05000000000000001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1284.6875, "completions/mean_terminated_length": 1069.375, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.5243048609721944, "frac_reward_zero_std": 0.0, "grad_norm": 3.1772363833895905, "kl": 0.018310546875, "learning_rate": 5.906372313276589e-07, "loss": -0.04, "num_tokens": 114494056.0, "reward": -4.470348358154297e-08, "reward_std": 1.0644896030426025, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1599673010769883, "rewards/wordcountpos_reward/raw_geo/std": 0.10901834009498342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1086.9375, "completions/mean_terminated_length": 1086.9375, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.5245049009801961, "frac_reward_zero_std": 0.0, "grad_norm": 2.960038954050213, "kl": 0.021087646484375, "learning_rate": 5.903242762413369e-07, "loss": 0.0142, "num_tokens": 114538983.0, "reward": 0.0, "reward_std": 0.9215567111968994, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07682865419149043, "rewards/wordcountpos_reward/raw_geo/std": 0.04342322206862802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1122.875, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.5247049409881976, "frac_reward_zero_std": 0.0, "grad_norm": 3.1678159000906634, "kl": 0.014801025390625, "learning_rate": 5.900113014927147e-07, "loss": -0.0012, "num_tokens": 114579565.0, "reward": 0.0, "reward_std": 0.45299214124679565, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14302214492132756, "rewards/wordcountpos_reward/raw_geo/std": 0.14464363308245334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1347.375, "completions/mean_terminated_length": 1296.5, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.5249049809961992, "frac_reward_zero_std": 0.0, "grad_norm": 3.0274858287811046, "kl": 0.016937255859375, "learning_rate": 5.896983072344002e-07, "loss": -0.0133, "num_tokens": 114623587.0, "reward": 0.0, "reward_std": 0.8259058594703674, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07908158719122745, "rewards/wordcountpos_reward/raw_geo/std": 0.13992244490578654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1061.75, "completions/mean_terminated_length": 1061.75, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.5251050210042009, "frac_reward_zero_std": 0.0, "grad_norm": 3.303952791107004, "kl": 0.017822265625, "learning_rate": 5.893852936190108e-07, "loss": -0.0199, "num_tokens": 114672951.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0615277290344238, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09121283864736948, "rewards/wordcountpos_reward/raw_geo/std": 0.04819746537291619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1148.0, "completions/mean_length": 1228.875, "completions/mean_terminated_length": 957.75, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.5253050610122024, "frac_reward_zero_std": 0.0, "grad_norm": 3.2138330117655505, "kl": 0.017242431640625, "learning_rate": 5.890722607991734e-07, "loss": 0.0513, "num_tokens": 114716117.0, "reward": 0.0, "reward_std": 0.9610219597816467, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07113523916452456, "rewards/wordcountpos_reward/raw_geo/std": 0.11086661680654372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1415.6875, "completions/mean_terminated_length": 1331.375, "completions/min_length": 1116.0, "completions/min_terminated_length": 1116.0, "epoch": 0.5255051010202041, "frac_reward_zero_std": 0.0, "grad_norm": 3.049324164094943, "kl": 0.018035888671875, "learning_rate": 5.887592089275242e-07, "loss": 0.012, "num_tokens": 114778136.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6533002853393555, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14964260636063423, "rewards/wordcountpos_reward/raw_geo/std": 0.15600549704502784, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1140.0625, "completions/mean_terminated_length": 1116.0667724609375, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.5257051410282056, "frac_reward_zero_std": 0.0, "grad_norm": 3.2873118642076338, "kl": 0.017333984375, "learning_rate": 5.884461381567088e-07, "loss": 0.002, "num_tokens": 114814441.0, "reward": 7.450580596923828e-09, "reward_std": 1.045774221420288, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08322096520911926, "rewards/wordcountpos_reward/raw_geo/std": 0.133123724018097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1175.625, "completions/mean_terminated_length": 1175.625, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.5259051810362072, "frac_reward_zero_std": 0.0, "grad_norm": 3.1046483711068404, "kl": 0.01947021484375, "learning_rate": 5.881330486393818e-07, "loss": -0.0055, "num_tokens": 114862283.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0116711854934692, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07166332237203164, "rewards/wordcountpos_reward/raw_geo/std": 0.05645070060607335, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1080.3125, "completions/mean_terminated_length": 1080.3125, "completions/min_length": 619.0, "completions/min_terminated_length": 619.0, "epoch": 0.5261052210442089, "frac_reward_zero_std": 0.0, "grad_norm": 3.5906449763325625, "kl": 0.01995849609375, "learning_rate": 5.87819940528207e-07, "loss": 0.025, "num_tokens": 114909336.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7490963339805603, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.030832990854369614, "rewards/wordcountpos_reward/raw_geo/std": 0.14098038843119542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 963.375, "completions/mean_terminated_length": 963.375, "completions/min_length": 636.0, "completions/min_terminated_length": 636.0, "epoch": 0.5263052610522104, "frac_reward_zero_std": 0.0, "grad_norm": 3.8852931674254862, "kl": 0.0225830078125, "learning_rate": 5.875068139758577e-07, "loss": -0.001, "num_tokens": 114958510.0, "reward": 0.0, "reward_std": 0.9499572515487671, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07331020509248734, "rewards/wordcountpos_reward/raw_geo/std": 0.2582321386384711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1191.125, "completions/mean_terminated_length": 1119.84619140625, "completions/min_length": 563.0, "completions/min_terminated_length": 563.0, "epoch": 0.5265053010602121, "frac_reward_zero_std": 0.0, "grad_norm": 2.6295110044913446, "kl": 0.014984130859375, "learning_rate": 5.871936691350154e-07, "loss": -0.0562, "num_tokens": 115013248.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0305094718933105, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12140769539520346, "rewards/wordcountpos_reward/raw_geo/std": 0.08575802540761171, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1230.875, "completions/mean_terminated_length": 1230.875, "completions/min_length": 1067.0, "completions/min_terminated_length": 1067.0, "epoch": 0.5267053410682137, "frac_reward_zero_std": 0.0, "grad_norm": 3.2054639305327557, "kl": 0.019287109375, "learning_rate": 5.868805061583713e-07, "loss": 0.007, "num_tokens": 115061118.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9792601466178894, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005599101455699463, "rewards/wordcountpos_reward/raw_geo/std": 0.010480418208589947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1352.75, "completions/mean_terminated_length": 1264.4000244140625, "completions/min_length": 1134.0, "completions/min_terminated_length": 1134.0, "epoch": 0.5269053810762152, "frac_reward_zero_std": 0.0, "grad_norm": 2.4375377001462315, "kl": 0.0114593505859375, "learning_rate": 5.865673251986248e-07, "loss": -0.0032, "num_tokens": 115106010.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8498144149780273, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1104025531324863, "rewards/wordcountpos_reward/raw_geo/std": 0.11800037750193905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1333.4375, "completions/mean_terminated_length": 1257.727294921875, "completions/min_length": 1110.0, "completions/min_terminated_length": 1110.0, "epoch": 0.5271054210842169, "frac_reward_zero_std": 0.0, "grad_norm": 3.5680258264295004, "kl": 0.018890380859375, "learning_rate": 5.862541264084846e-07, "loss": 0.0003, "num_tokens": 115154945.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9026488661766052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11583175193107989, "rewards/wordcountpos_reward/raw_geo/std": 0.22249863732071837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1164.9375, "completions/mean_terminated_length": 1117.071533203125, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.5273054610922184, "frac_reward_zero_std": 0.0, "grad_norm": 2.7859838945936857, "kl": 0.0133514404296875, "learning_rate": 5.859409099406677e-07, "loss": -0.003, "num_tokens": 115193712.0, "reward": 0.0, "reward_std": 0.9995220303535461, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10741977858600688, "rewards/wordcountpos_reward/raw_geo/std": 0.17177726965363183, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1343709624716425, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1170.9375, "completions/mean_terminated_length": 1149.0, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.5275055011002201, "frac_reward_zero_std": 0.0, "grad_norm": 3.4283176971751415, "kl": 0.019195556640625, "learning_rate": 5.856276759478998e-07, "loss": 0.0068, "num_tokens": 115235375.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9505083560943604, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.033191504745148426, "rewards/wordcountpos_reward/raw_geo/std": 0.1942294893976506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1246.625, "completions/mean_terminated_length": 1131.45458984375, "completions/min_length": 794.0, "completions/min_terminated_length": 794.0, "epoch": 0.5277055411082217, "frac_reward_zero_std": 0.0, "grad_norm": 3.16141987170564, "kl": 0.01422119140625, "learning_rate": 5.853144245829153e-07, "loss": -0.056, "num_tokens": 115291113.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0358937978744507, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15413616007563055, "rewards/wordcountpos_reward/raw_geo/std": 0.2712050897221045, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1282.625, "completions/mean_terminated_length": 1282.625, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.5279055811162232, "frac_reward_zero_std": 0.0, "grad_norm": 2.472386626968951, "kl": 0.0118255615234375, "learning_rate": 5.850011559984572e-07, "loss": 0.018, "num_tokens": 115335011.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9933051466941833, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1878998976485834, "rewards/wordcountpos_reward/raw_geo/std": 0.12447187236757576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1095.875, "completions/mean_terminated_length": 1095.875, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.5281056211242249, "frac_reward_zero_std": 0.0, "grad_norm": 2.9652600556400386, "kl": 0.0123138427734375, "learning_rate": 5.846878703472764e-07, "loss": -0.0023, "num_tokens": 115384953.0, "reward": 0.0, "reward_std": 0.621292233467102, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07892206103286606, "rewards/wordcountpos_reward/raw_geo/std": 0.12114116920515415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1052.625, "completions/mean_terminated_length": 1022.800048828125, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.5283056611322264, "frac_reward_zero_std": 0.0, "grad_norm": 3.5757406609605273, "kl": 0.018035888671875, "learning_rate": 5.843745677821325e-07, "loss": -0.003, "num_tokens": 115419707.0, "reward": 0.0, "reward_std": 0.7799457907676697, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1843830842774939, "rewards/wordcountpos_reward/raw_geo/std": 0.07931605178938245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1405.5, "completions/mean_terminated_length": 1284.0, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.528505701140228, "frac_reward_zero_std": 0.0, "grad_norm": 2.800425359004418, "kl": 0.0134735107421875, "learning_rate": 5.840612484557933e-07, "loss": -0.0117, "num_tokens": 115471251.0, "reward": 0.0, "reward_std": 0.8388550281524658, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022128517713603753, "rewards/wordcountpos_reward/raw_geo/std": 0.11465819353742752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1165.6875, "completions/mean_terminated_length": 1088.5384521484375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.5287057411482297, "frac_reward_zero_std": 0.0, "grad_norm": 2.4750196959912643, "kl": 0.0127716064453125, "learning_rate": 5.837479125210347e-07, "loss": 0.0563, "num_tokens": 115518774.0, "reward": 3.725290298461914e-08, "reward_std": 1.0674419403076172, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09350073372009995, "rewards/wordcountpos_reward/raw_geo/std": 0.13006097757264207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1138.3125, "completions/mean_terminated_length": 1114.2000732421875, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.5289057811562312, "frac_reward_zero_std": 0.0, "grad_norm": 3.439489777666523, "kl": 0.02105712890625, "learning_rate": 5.834345601306406e-07, "loss": -0.0101, "num_tokens": 115561843.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0192022323608398, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040913205885752306, "rewards/wordcountpos_reward/raw_geo/std": 0.1396489969167037, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1199.25, "completions/mean_terminated_length": 1129.84619140625, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.5291058211642329, "frac_reward_zero_std": 0.0, "grad_norm": 3.1966573952091455, "kl": 0.0224609375, "learning_rate": 5.831211914374032e-07, "loss": 0.0061, "num_tokens": 115598951.0, "reward": 5.960464477539063e-08, "reward_std": 0.8583661913871765, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11173611621237559, "rewards/wordcountpos_reward/raw_geo/std": 0.11696371430465986, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1148.125, "completions/mean_terminated_length": 1148.125, "completions/min_length": 497.0, "completions/min_terminated_length": 497.0, "epoch": 0.5293058611722344, "frac_reward_zero_std": 0.0, "grad_norm": 3.161403300950804, "kl": 0.0180816650390625, "learning_rate": 5.828078065941225e-07, "loss": -0.0145, "num_tokens": 115653745.0, "reward": 0.0, "reward_std": 0.9942439794540405, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025371873449593893, "rewards/wordcountpos_reward/raw_geo/std": 0.05885473864177464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1154.375, "completions/mean_terminated_length": 1131.3333740234375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.529505901180236, "frac_reward_zero_std": 0.0, "grad_norm": 3.2739046638939135, "kl": 0.019989013671875, "learning_rate": 5.824944057536063e-07, "loss": 0.0474, "num_tokens": 115699551.0, "reward": -2.9802322387695312e-08, "reward_std": 1.01275634765625, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26101064583207695, "rewards/wordcountpos_reward/raw_geo/std": 0.1338948128367282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1202.0, "completions/mean_terminated_length": 1182.1334228515625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.5297059411882377, "frac_reward_zero_std": 0.0, "grad_norm": 2.585668441924804, "kl": 0.0142974853515625, "learning_rate": 5.821809890686703e-07, "loss": 0.0093, "num_tokens": 115748375.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9600592851638794, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1398407494164169, "rewards/wordcountpos_reward/raw_geo/std": 0.2127451548002688, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1025.0625, "completions/mean_terminated_length": 1025.0625, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.5299059811962392, "frac_reward_zero_std": 0.0, "grad_norm": 3.2990853960446986, "kl": 0.02081298828125, "learning_rate": 5.818675566921381e-07, "loss": -0.0302, "num_tokens": 115800760.0, "reward": -7.450580596923828e-09, "reward_std": 1.0653877258300781, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16654424032173462, "rewards/wordcountpos_reward/raw_geo/std": 0.08046078754796773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1049.5625, "completions/mean_terminated_length": 1049.5625, "completions/min_length": 685.0, "completions/min_terminated_length": 685.0, "epoch": 0.5301060212042409, "frac_reward_zero_std": 0.0, "grad_norm": 3.4681550701355928, "kl": 0.0211181640625, "learning_rate": 5.815541087768402e-07, "loss": 0.0126, "num_tokens": 115841049.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0074832439422607, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016201305177321032, "rewards/wordcountpos_reward/raw_geo/std": 0.06431336522553675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1348.6875, "completions/mean_terminated_length": 1313.769287109375, "completions/min_length": 1046.0, "completions/min_terminated_length": 1046.0, "epoch": 0.5303060612122424, "frac_reward_zero_std": 0.0, "grad_norm": 2.8808822391777067, "kl": 0.01629638671875, "learning_rate": 5.812406454756158e-07, "loss": 0.0167, "num_tokens": 115886636.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9139087200164795, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13222862057901671, "rewards/wordcountpos_reward/raw_geo/std": 0.06820374516344276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12583057392117916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1164.875, "completions/mean_terminated_length": 1164.875, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.530506101220244, "frac_reward_zero_std": 0.0, "grad_norm": 1.5526103508657887, "kl": 0.00807952880859375, "learning_rate": 5.809271669413106e-07, "loss": 0.0173, "num_tokens": 115937802.0, "reward": 0.0, "reward_std": 1.0404446125030518, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03042841665969094, "rewards/wordcountpos_reward/raw_geo/std": 0.06649261473105766, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460887, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1082.0, "completions/mean_terminated_length": 1082.0, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.5307061412282457, "frac_reward_zero_std": 0.0, "grad_norm": 3.643976977737567, "kl": 0.01715087890625, "learning_rate": 5.806136733267787e-07, "loss": 0.023, "num_tokens": 115987578.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9127200841903687, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1558576485506737, "rewards/wordcountpos_reward/raw_geo/std": 0.2445771098791036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.13305526559931294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1269.25, "completions/mean_terminated_length": 1130.800048828125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.5309061812362472, "frac_reward_zero_std": 0.0, "grad_norm": 2.7356680608085147, "kl": 0.0149078369140625, "learning_rate": 5.803001647848805e-07, "loss": 0.006, "num_tokens": 116043854.0, "reward": 0.0, "reward_std": 0.8800334930419922, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19586131027576403, "rewards/wordcountpos_reward/raw_geo/std": 0.1607462654173008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1077.4375, "completions/mean_terminated_length": 1077.4375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.5311062212442489, "frac_reward_zero_std": 0.0, "grad_norm": 2.8245462659610983, "kl": 0.0135955810546875, "learning_rate": 5.799866414684842e-07, "loss": 0.0151, "num_tokens": 116082237.0, "reward": -7.450580596923828e-09, "reward_std": 1.0686872005462646, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010289688746558941, "rewards/wordcountpos_reward/raw_geo/std": 0.0872800230061202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1072.5625, "completions/mean_terminated_length": 1072.5625, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.5313062612522504, "frac_reward_zero_std": 0.0, "grad_norm": 3.6931032396658807, "kl": 0.022064208984375, "learning_rate": 5.796731035304654e-07, "loss": 0.0255, "num_tokens": 116125406.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5724748373031616, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0715425298565079, "rewards/wordcountpos_reward/raw_geo/std": 0.045581224520042055, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1095.8125, "completions/mean_terminated_length": 1095.8125, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.531506301260252, "frac_reward_zero_std": 0.0, "grad_norm": 2.6061206641734396, "kl": 0.0142974853515625, "learning_rate": 5.793595511237067e-07, "loss": -0.0624, "num_tokens": 116176931.0, "reward": 7.450580596923828e-09, "reward_std": 0.9638686180114746, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.16142435031738825, "rewards/wordcountpos_reward/raw_geo/std": 0.05527587756060644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 917.75, "completions/mean_terminated_length": 917.75, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.5317063412682537, "frac_reward_zero_std": 0.0, "grad_norm": 3.2915601489541033, "kl": 0.01824951171875, "learning_rate": 5.790459844010976e-07, "loss": 0.0198, "num_tokens": 116210311.0, "reward": 0.0, "reward_std": 0.9147825241088867, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12675425149939643, "rewards/wordcountpos_reward/raw_geo/std": 0.12380906576469891, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1083.375, "completions/mean_terminated_length": 1083.375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.5319063812762552, "frac_reward_zero_std": 0.0, "grad_norm": 2.640771987759865, "kl": 0.018280029296875, "learning_rate": 5.787324035155344e-07, "loss": -0.0244, "num_tokens": 116244741.0, "reward": 0.0, "reward_std": 1.0585709810256958, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03733838249236895, "rewards/wordcountpos_reward/raw_geo/std": 0.1105069224282938, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1303.25, "completions/mean_terminated_length": 1185.2000732421875, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.5321064212842569, "frac_reward_zero_std": 0.0, "grad_norm": 2.007710255196761, "kl": 0.0100555419921875, "learning_rate": 5.784188086199209e-07, "loss": -0.019, "num_tokens": 116291977.0, "reward": -7.450580596923828e-09, "reward_std": 1.033731460571289, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.17247674481419184, "rewards/wordcountpos_reward/raw_geo/std": 0.157538089291856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 947.4375, "completions/mean_terminated_length": 947.4375, "completions/min_length": 594.0, "completions/min_terminated_length": 594.0, "epoch": 0.5323064612922584, "frac_reward_zero_std": 0.0, "grad_norm": 3.378209696213499, "kl": 0.0158538818359375, "learning_rate": 5.781051998671674e-07, "loss": -0.0206, "num_tokens": 116327472.0, "reward": 0.0, "reward_std": 0.809309720993042, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07081953820196128, "rewards/wordcountpos_reward/raw_geo/std": 0.07028662713710912, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1282.5, "completions/mean_terminated_length": 1268.0001220703125, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.53250650130026, "frac_reward_zero_std": 0.0, "grad_norm": 3.0152921047277768, "kl": 0.0175323486328125, "learning_rate": 5.777915774101907e-07, "loss": -0.0481, "num_tokens": 116381568.0, "reward": -2.9802322387695312e-08, "reward_std": 0.832829475402832, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24259543021005722, "rewards/wordcountpos_reward/raw_geo/std": 0.25291185561127516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1057600358603626, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1146.9375, "completions/mean_terminated_length": 1123.4000244140625, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.5327065413082617, "frac_reward_zero_std": 0.0, "grad_norm": 3.437736372499837, "kl": 0.019012451171875, "learning_rate": 5.774779414019145e-07, "loss": -0.0623, "num_tokens": 116419303.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7554380893707275, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11773699230279955, "rewards/wordcountpos_reward/raw_geo/std": 0.1360232291113219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1232.9375, "completions/mean_terminated_length": 1171.3077392578125, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.5329065813162632, "frac_reward_zero_std": 0.0, "grad_norm": 3.1611656343161543, "kl": 0.016754150390625, "learning_rate": 5.771642919952696e-07, "loss": -0.0628, "num_tokens": 116469206.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8111255764961243, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09074367344278304, "rewards/wordcountpos_reward/raw_geo/std": 0.3273465001400447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13443985299781488, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1295.0625, "completions/mean_terminated_length": 1281.4000244140625, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.5331066213242649, "frac_reward_zero_std": 0.0, "grad_norm": 3.2564236538496525, "kl": 0.020263671875, "learning_rate": 5.768506293431923e-07, "loss": -0.0057, "num_tokens": 116518487.0, "reward": 0.0, "reward_std": 0.6319928765296936, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16102801500944608, "rewards/wordcountpos_reward/raw_geo/std": 0.1341694557326364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13381856152046848, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1203.4375, "completions/mean_terminated_length": 1183.666748046875, "completions/min_length": 1006.0, "completions/min_terminated_length": 1006.0, "epoch": 0.5333066613322665, "frac_reward_zero_std": 0.0, "grad_norm": 3.0458123615884363, "kl": 0.0154876708984375, "learning_rate": 5.765369535986264e-07, "loss": -0.0035, "num_tokens": 116569446.0, "reward": 0.0, "reward_std": 0.8763201236724854, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2848726600153308, "rewards/wordcountpos_reward/raw_geo/std": 0.08348811703983518, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1016.75, "completions/mean_terminated_length": 1016.75, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.533506701340268, "frac_reward_zero_std": 0.0, "grad_norm": 3.6335304294626636, "kl": 0.020294189453125, "learning_rate": 5.762232649145212e-07, "loss": -0.0112, "num_tokens": 116619642.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9646036028862, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06311981140958234, "rewards/wordcountpos_reward/raw_geo/std": 0.14875494988314422, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1339.8125, "completions/mean_terminated_length": 1316.9285888671875, "completions/min_length": 1188.0, "completions/min_terminated_length": 1188.0, "epoch": 0.5337067413482697, "frac_reward_zero_std": 0.0, "grad_norm": 2.656992262625212, "kl": 0.012359619140625, "learning_rate": 5.759095634438331e-07, "loss": -0.017, "num_tokens": 116666119.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9374561309814453, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003630258709219324, "rewards/wordcountpos_reward/raw_geo/std": 0.0721890672006, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 824.0625, "completions/mean_terminated_length": 824.0625, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.5339067813562712, "frac_reward_zero_std": 0.0, "grad_norm": 2.9191106278001735, "kl": 0.01128387451171875, "learning_rate": 5.755958493395243e-07, "loss": -0.0187, "num_tokens": 116700584.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8860800266265869, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16601785818130907, "rewards/wordcountpos_reward/raw_geo/std": 0.2762382457839967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1164.0625, "completions/mean_terminated_length": 1164.0625, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.5341068213642729, "frac_reward_zero_std": 0.0, "grad_norm": 3.0543434457765004, "kl": 0.0168609619140625, "learning_rate": 5.75282122754563e-07, "loss": 0.0051, "num_tokens": 116738537.0, "reward": -2.9802322387695312e-08, "reward_std": 1.027980923652649, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1833545898106329, "rewards/wordcountpos_reward/raw_geo/std": 0.06938558692130349, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1330.6875, "completions/mean_terminated_length": 1291.615478515625, "completions/min_length": 1114.0, "completions/min_terminated_length": 1114.0, "epoch": 0.5343068613722745, "frac_reward_zero_std": 0.0, "grad_norm": 2.3234047192923355, "kl": 0.0143585205078125, "learning_rate": 5.749683838419239e-07, "loss": -0.0426, "num_tokens": 116783700.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7011748552322388, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08086667467379957, "rewards/wordcountpos_reward/raw_geo/std": 0.0857848390993117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1125.125, "completions/mean_terminated_length": 900.2000122070312, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.534506901380276, "frac_reward_zero_std": 0.0, "grad_norm": 3.1539324407478615, "kl": 0.0199737548828125, "learning_rate": 5.746546327545874e-07, "loss": -0.0148, "num_tokens": 116824254.0, "reward": -4.470348358154297e-08, "reward_std": 0.9478144645690918, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03618729371519117, "rewards/wordcountpos_reward/raw_geo/std": 0.18209391402918362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1140.5, "completions/mean_terminated_length": 1140.5, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.5347069413882777, "frac_reward_zero_std": 0.0, "grad_norm": 3.0959911855706834, "kl": 0.0150604248046875, "learning_rate": 5.743408696455402e-07, "loss": -0.0056, "num_tokens": 116864462.0, "reward": -2.9802322387695312e-08, "reward_std": 0.38974490761756897, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040352975058039836, "rewards/wordcountpos_reward/raw_geo/std": 0.13631681924584285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1136.125, "completions/mean_terminated_length": 1136.125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.5349069813962792, "frac_reward_zero_std": 0.0, "grad_norm": 3.460732920348063, "kl": 0.02618408203125, "learning_rate": 5.740270946677742e-07, "loss": 0.021, "num_tokens": 116918664.0, "reward": 0.0, "reward_std": 0.9318538904190063, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1180444690293677, "rewards/wordcountpos_reward/raw_geo/std": 0.07815724593947285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1137.5, "completions/mean_terminated_length": 1085.71435546875, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.5351070214042809, "frac_reward_zero_std": 0.0, "grad_norm": 2.76275325857321, "kl": 0.015869140625, "learning_rate": 5.737133079742879e-07, "loss": -0.0376, "num_tokens": 116957368.0, "reward": -5.960464477539063e-08, "reward_std": 0.8619743585586548, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17265636250935865, "rewards/wordcountpos_reward/raw_geo/std": 0.2670402082506081, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1213.5625, "completions/mean_terminated_length": 1172.6429443359375, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.5353070614122825, "frac_reward_zero_std": 0.0, "grad_norm": 3.2491159476746683, "kl": 0.0185546875, "learning_rate": 5.733995097180847e-07, "loss": -0.0433, "num_tokens": 117009041.0, "reward": 0.0, "reward_std": 0.9703395366668701, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13509627217160572, "rewards/wordcountpos_reward/raw_geo/std": 0.08533182241524107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1272.875, "completions/mean_terminated_length": 1272.875, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.535507101420284, "frac_reward_zero_std": 0.0, "grad_norm": 2.10346529484213, "kl": 0.01214599609375, "learning_rate": 5.730857000521746e-07, "loss": -0.0056, "num_tokens": 117054519.0, "reward": 0.0, "reward_std": 0.5297501683235168, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09865313676441759, "rewards/wordcountpos_reward/raw_geo/std": 0.06726118715657903, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1074.4375, "completions/mean_terminated_length": 1013.6428833007812, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.5357071414282857, "frac_reward_zero_std": 0.0, "grad_norm": 3.483158992713572, "kl": 0.017333984375, "learning_rate": 5.727718791295721e-07, "loss": 0.0095, "num_tokens": 117108350.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6920267939567566, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11216628281611463, "rewards/wordcountpos_reward/raw_geo/std": 0.14007585311779874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 927.3125, "completions/mean_terminated_length": 927.3125, "completions/min_length": 528.0, "completions/min_terminated_length": 528.0, "epoch": 0.5359071814362872, "frac_reward_zero_std": 0.0, "grad_norm": 2.93889386615737, "kl": 0.0142364501953125, "learning_rate": 5.72458047103298e-07, "loss": -0.0299, "num_tokens": 117149731.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9915274381637573, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07504079636290863, "rewards/wordcountpos_reward/raw_geo/std": 0.029833068746483324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1286.3125, "completions/mean_terminated_length": 1158.0999755859375, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.5361072214442889, "frac_reward_zero_std": 0.0, "grad_norm": 3.468867526254976, "kl": 0.018035888671875, "learning_rate": 5.721442041263783e-07, "loss": -0.0094, "num_tokens": 117203720.0, "reward": 7.450580596923828e-09, "reward_std": 1.0414239168167114, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.19994760817472107, "rewards/wordcountpos_reward/raw_geo/std": 0.4561678726015729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1224.25, "completions/mean_terminated_length": 1184.857177734375, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.5363072614522905, "frac_reward_zero_std": 0.0, "grad_norm": 3.1552086637898102, "kl": 0.017608642578125, "learning_rate": 5.718303503518439e-07, "loss": -0.0042, "num_tokens": 117256052.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0652034282684326, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10300368208971332, "rewards/wordcountpos_reward/raw_geo/std": 0.05416546347347958, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.054433105395181744, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1034.0, "completions/mean_terminated_length": 1034.0, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.536507301460292, "frac_reward_zero_std": 0.0, "grad_norm": 3.541646783639178, "kl": 0.0169830322265625, "learning_rate": 5.715164859327318e-07, "loss": -0.0155, "num_tokens": 117296140.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9717391133308411, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03480402168613437, "rewards/wordcountpos_reward/raw_geo/std": 0.21876339560750968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1218.8125, "completions/mean_terminated_length": 1218.8125, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.5367073414682937, "frac_reward_zero_std": 0.0, "grad_norm": 2.5228493942458927, "kl": 0.0127105712890625, "learning_rate": 5.712026110220832e-07, "loss": 0.0064, "num_tokens": 117347265.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0477489233016968, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07499830086877833, "rewards/wordcountpos_reward/raw_geo/std": 0.1319823815774806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1268.75, "completions/mean_terminated_length": 1215.3846435546875, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.5369073814762952, "frac_reward_zero_std": 0.0, "grad_norm": 3.4584114796449206, "kl": 0.01959228515625, "learning_rate": 5.708887257729453e-07, "loss": 0.0175, "num_tokens": 117393869.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8136645555496216, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11843614599535406, "rewards/wordcountpos_reward/raw_geo/std": 0.11788839011808551, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1103.9375, "completions/mean_terminated_length": 1103.9375, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.5371074214842969, "frac_reward_zero_std": 0.0, "grad_norm": 3.147249283607828, "kl": 0.0175933837890625, "learning_rate": 5.705748303383698e-07, "loss": -0.045, "num_tokens": 117443980.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9166231155395508, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0007084624034166959, "rewards/wordcountpos_reward/raw_geo/std": 0.15811944959167623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457552, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1160.0, "completions/mean_terminated_length": 1160.0, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.5373074614922985, "frac_reward_zero_std": 0.0, "grad_norm": 2.9144589674359382, "kl": 0.010528564453125, "learning_rate": 5.702609248714136e-07, "loss": 0.0207, "num_tokens": 117491268.0, "reward": 0.0, "reward_std": 0.7417783737182617, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09154705765565159, "rewards/wordcountpos_reward/raw_geo/std": 0.2146451014273662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1189.3125, "completions/mean_terminated_length": 1144.9285888671875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.5375075015003, "frac_reward_zero_std": 0.0, "grad_norm": 3.1038154348115357, "kl": 0.0138092041015625, "learning_rate": 5.699470095251385e-07, "loss": -0.035, "num_tokens": 117530241.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9138014912605286, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05534530683883563, "rewards/wordcountpos_reward/raw_geo/std": 0.19528662451249523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625448, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 983.0, "completions/mean_terminated_length": 983.0, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.5377075415083017, "frac_reward_zero_std": 0.0, "grad_norm": 3.304196954792557, "kl": 0.01458740234375, "learning_rate": 5.69633084452611e-07, "loss": -0.0099, "num_tokens": 117569145.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8686536550521851, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14349846028306426, "rewards/wordcountpos_reward/raw_geo/std": 0.18393287167919878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.725, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1322.9375, "completions/mean_terminated_length": 1297.6429443359375, "completions/min_length": 1093.0, "completions/min_terminated_length": 1093.0, "epoch": 0.5379075815163032, "frac_reward_zero_std": 0.0, "grad_norm": 1.9392975835679573, "kl": 0.008434295654296875, "learning_rate": 5.693191498069023e-07, "loss": -0.0219, "num_tokens": 117616968.0, "reward": -3.725290298461914e-09, "reward_std": 1.0432195663452148, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13200748330594922, "rewards/wordcountpos_reward/raw_geo/std": 0.1032787348765414, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1092.25, "completions/mean_terminated_length": 1092.25, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.5381076215243049, "frac_reward_zero_std": 0.0, "grad_norm": 3.4484593384229023, "kl": 0.019622802734375, "learning_rate": 5.690052057410885e-07, "loss": -0.0689, "num_tokens": 117667140.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5399351119995117, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03203308603165858, "rewards/wordcountpos_reward/raw_geo/std": 0.16505813641412287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1169.3125, "completions/mean_terminated_length": 1169.3125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.5383076615323065, "frac_reward_zero_std": 0.0, "grad_norm": 2.645204662377473, "kl": 0.0146942138671875, "learning_rate": 5.6869125240825e-07, "loss": -0.0453, "num_tokens": 117711609.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0459929704666138, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12883296612457026, "rewards/wordcountpos_reward/raw_geo/std": 0.07578774366324507, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1285.0, "completions/mean_terminated_length": 1270.666748046875, "completions/min_length": 1161.0, "completions/min_terminated_length": 1161.0, "epoch": 0.538507701540308, "frac_reward_zero_std": 0.0, "grad_norm": 3.0361997499650224, "kl": 0.017608642578125, "learning_rate": 5.683772899614719e-07, "loss": -0.0171, "num_tokens": 117751641.0, "reward": -2.9802322387695312e-08, "reward_std": 0.710211992263794, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01230124472429855, "rewards/wordcountpos_reward/raw_geo/std": 0.13224145692565265, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1356.3125, "completions/mean_terminated_length": 1270.0999755859375, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.5387077415483097, "frac_reward_zero_std": 0.0, "grad_norm": 2.644788660185087, "kl": 0.0136566162109375, "learning_rate": 5.68063318553844e-07, "loss": 0.0075, "num_tokens": 117800462.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9373717904090881, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2892333732519394, "rewards/wordcountpos_reward/raw_geo/std": 0.2915571450205382, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1321.9375, "completions/mean_terminated_length": 1183.4444580078125, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.5389077815563112, "frac_reward_zero_std": 0.0, "grad_norm": 2.263410394066094, "kl": 0.0112762451171875, "learning_rate": 5.677493383384598e-07, "loss": 0.0143, "num_tokens": 117847661.0, "reward": 0.0, "reward_std": 0.648388147354126, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10213418953805993, "rewards/wordcountpos_reward/raw_geo/std": 0.221744349176967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1003.5, "completions/mean_terminated_length": 1003.5, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.5391078215643129, "frac_reward_zero_std": 0.0, "grad_norm": 3.6044166217553437, "kl": 0.01861572265625, "learning_rate": 5.674353494684176e-07, "loss": -0.0083, "num_tokens": 117893005.0, "reward": 0.0, "reward_std": 1.0628652572631836, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17437463777685838, "rewards/wordcountpos_reward/raw_geo/std": 0.06515709357401875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1239.125, "completions/mean_terminated_length": 1201.857177734375, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.5393078615723145, "frac_reward_zero_std": 0.0, "grad_norm": 2.9201290587187936, "kl": 0.016204833984375, "learning_rate": 5.671213520968198e-07, "loss": -0.024, "num_tokens": 117946471.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6075248122215271, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08129051363418009, "rewards/wordcountpos_reward/raw_geo/std": 0.10342811055268296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1142.4375, "completions/mean_terminated_length": 1118.60009765625, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.539507901580316, "frac_reward_zero_std": 0.0, "grad_norm": 3.261907001229382, "kl": 0.018035888671875, "learning_rate": 5.66807346376773e-07, "loss": 0.0049, "num_tokens": 117995126.0, "reward": 0.0, "reward_std": 0.9306865334510803, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.37188788343706414, "rewards/wordcountpos_reward/raw_geo/std": 0.15912096715654836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1175.86669921875, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.5397079415883177, "frac_reward_zero_std": 0.0, "grad_norm": 2.959099695301735, "kl": 0.014984130859375, "learning_rate": 5.664933324613879e-07, "loss": 0.0023, "num_tokens": 118034016.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6076076030731201, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.038544714998714855, "rewards/wordcountpos_reward/raw_geo/std": 0.051646705564295875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.20348810139024692, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1225.125, "completions/mean_terminated_length": 1225.125, "completions/min_length": 1133.0, "completions/min_terminated_length": 1133.0, "epoch": 0.5399079815963193, "frac_reward_zero_std": 0.0, "grad_norm": 2.008289564533446, "kl": 0.0096588134765625, "learning_rate": 5.661793105037789e-07, "loss": 0.01, "num_tokens": 118070418.0, "reward": 7.450580596923828e-09, "reward_std": 0.97540283203125, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06726017901950569, "rewards/wordcountpos_reward/raw_geo/std": 0.05916886020532251, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1260.875, "completions/mean_terminated_length": 1226.71435546875, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.5401080216043208, "frac_reward_zero_std": 0.0, "grad_norm": 2.949602316939215, "kl": 0.01568603515625, "learning_rate": 5.658652806570646e-07, "loss": 0.0052, "num_tokens": 118122304.0, "reward": -7.450580596923828e-09, "reward_std": 1.0296714305877686, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0922875984111438, "rewards/wordcountpos_reward/raw_geo/std": 0.11935253841799788, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1045.3125, "completions/mean_terminated_length": 1045.3125, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.5403080616123225, "frac_reward_zero_std": 0.0, "grad_norm": 3.6541877559387195, "kl": 0.020050048828125, "learning_rate": 5.655512430743674e-07, "loss": 0.0005, "num_tokens": 118160093.0, "reward": 0.0, "reward_std": 0.7950319647789001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18805175234698712, "rewards/wordcountpos_reward/raw_geo/std": 0.14873647768308026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1164.6875, "completions/mean_terminated_length": 1164.6875, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.540508101620324, "frac_reward_zero_std": 0.0, "grad_norm": 1.8385573485986906, "kl": 0.00762939453125, "learning_rate": 5.652371979088136e-07, "loss": -0.011, "num_tokens": 118203520.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4853672683238983, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10439913437151771, "rewards/wordcountpos_reward/raw_geo/std": 0.19927555602723787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1263.5, "completions/mean_terminated_length": 1208.923095703125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.5407081416283257, "frac_reward_zero_std": 0.0, "grad_norm": 3.4966670717735115, "kl": 0.02093505859375, "learning_rate": 5.649231453135327e-07, "loss": 0.0132, "num_tokens": 118243672.0, "reward": 4.0978193283081055e-08, "reward_std": 1.0247505903244019, "rewards/wordcountpos_reward/mean": 4.0978193283081055e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09719127198454829, "rewards/wordcountpos_reward/raw_geo/std": 0.1896013871569092, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 1001.4375, "completions/mean_terminated_length": 1001.4375, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.5409081816363273, "frac_reward_zero_std": 0.0, "grad_norm": 2.7405593958200782, "kl": 0.012939453125, "learning_rate": 5.646090854416585e-07, "loss": -0.01, "num_tokens": 118291303.0, "reward": 0.0, "reward_std": 0.4563646912574768, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0889888222078795, "rewards/wordcountpos_reward/raw_geo/std": 0.1523120598144673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1235.875, "completions/min_length": 1076.0, "completions/min_terminated_length": 1076.0, "epoch": 0.5411082216443288, "frac_reward_zero_std": 0.0, "grad_norm": 2.8147571707351045, "kl": 0.01495361328125, "learning_rate": 5.642950184463279e-07, "loss": -0.025, "num_tokens": 118337581.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9473732709884644, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026539158918848504, "rewards/wordcountpos_reward/raw_geo/std": 0.07035625647877948, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 939.625, "completions/mean_terminated_length": 939.625, "completions/min_length": 677.0, "completions/min_terminated_length": 677.0, "epoch": 0.5413082616523305, "frac_reward_zero_std": 0.0, "grad_norm": 3.822877479571387, "kl": 0.021759033203125, "learning_rate": 5.639809444806814e-07, "loss": -0.0325, "num_tokens": 118382743.0, "reward": 0.0, "reward_std": 0.7992068529129028, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.002809148232020764, "rewards/wordcountpos_reward/raw_geo/std": 0.07688037182784903, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1151.6875, "completions/mean_terminated_length": 1151.6875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.541508301660332, "frac_reward_zero_std": 0.0, "grad_norm": 3.3739900396086178, "kl": 0.01763916015625, "learning_rate": 5.636668636978629e-07, "loss": 0.0062, "num_tokens": 118425338.0, "reward": 0.0, "reward_std": 0.9275894165039062, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13343473943944842, "rewards/wordcountpos_reward/raw_geo/std": 0.1425868460056315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15770342536029575, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1117.4375, "completions/mean_terminated_length": 1117.4375, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.5417083416683337, "frac_reward_zero_std": 0.0, "grad_norm": 3.809725194395003, "kl": 0.022491455078125, "learning_rate": 5.633527762510195e-07, "loss": 0.0133, "num_tokens": 118477833.0, "reward": 1.4901161193847656e-08, "reward_std": 1.059481143951416, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03760624941653064, "rewards/wordcountpos_reward/raw_geo/std": 0.12483531858793802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7, "rewards/wordcountpos_reward/raw_rule/std": 0.190126663432955, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1230.5, "completions/mean_terminated_length": 1230.5, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.5419083816763353, "frac_reward_zero_std": 0.0, "grad_norm": 3.1030506360291428, "kl": 0.0154571533203125, "learning_rate": 5.630386822933019e-07, "loss": 0.0031, "num_tokens": 118517697.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7896203398704529, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07850575402664803, "rewards/wordcountpos_reward/raw_geo/std": 0.05755566028510839, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1312.0625, "completions/mean_terminated_length": 1285.21435546875, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.5421084216843368, "frac_reward_zero_std": 0.0, "grad_norm": 2.826267473604537, "kl": 0.0155792236328125, "learning_rate": 5.627245819778635e-07, "loss": 0.0095, "num_tokens": 118567522.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0528076887130737, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07414025049343943, "rewards/wordcountpos_reward/raw_geo/std": 0.07887893286749448, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 1.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1097.0625, "completions/mean_terminated_length": 1097.0625, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.5423084616923385, "frac_reward_zero_std": 0.0, "grad_norm": 3.4037697124335677, "kl": 0.020050048828125, "learning_rate": 5.624104754578612e-07, "loss": -0.0074, "num_tokens": 118610099.0, "reward": 0.0, "reward_std": 0.9156361818313599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.034358665984815066, "rewards/wordcountpos_reward/raw_geo/std": 0.05777840759582623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1103.4375, "completions/mean_terminated_length": 1103.4375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.54250850170034, "frac_reward_zero_std": 0.0, "grad_norm": 3.4512290689460516, "kl": 0.0182342529296875, "learning_rate": 5.620963628864547e-07, "loss": -0.0194, "num_tokens": 118653618.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0679292678833008, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0036839197692002815, "rewards/wordcountpos_reward/raw_geo/std": 0.11457140068741874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1188.9375, "completions/mean_terminated_length": 1168.2000732421875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.5427085417083417, "frac_reward_zero_std": 0.0, "grad_norm": 3.1061957527043567, "kl": 0.018035888671875, "learning_rate": 5.617822444168067e-07, "loss": 0.0065, "num_tokens": 118702641.0, "reward": 5.960464477539063e-08, "reward_std": 0.7717523574829102, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17733304498025995, "rewards/wordcountpos_reward/raw_geo/std": 0.0792382669489115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1379.3125, "completions/mean_terminated_length": 1339.0833740234375, "completions/min_length": 1118.0, "completions/min_terminated_length": 1118.0, "epoch": 0.5429085817163433, "frac_reward_zero_std": 0.0, "grad_norm": 2.68616851174001, "kl": 0.016876220703125, "learning_rate": 5.614681202020829e-07, "loss": -0.0089, "num_tokens": 118749374.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6152324676513672, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03233620349963368, "rewards/wordcountpos_reward/raw_geo/std": 0.10719177564107593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1284.4375, "completions/mean_terminated_length": 1155.0999755859375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.5431086217243448, "frac_reward_zero_std": 0.0, "grad_norm": 3.200784744094138, "kl": 0.018951416015625, "learning_rate": 5.611539903954515e-07, "loss": -0.0142, "num_tokens": 118791661.0, "reward": -1.862645149230957e-08, "reward_std": 1.0296926498413086, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05402496258462227, "rewards/wordcountpos_reward/raw_geo/std": 0.09011445375581113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1124.5625, "completions/mean_terminated_length": 1124.5625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.5433086617323465, "frac_reward_zero_std": 0.0, "grad_norm": 2.8088664015083764, "kl": 0.01727294921875, "learning_rate": 5.608398551500838e-07, "loss": -0.0402, "num_tokens": 118836622.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7781370878219604, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4459269927908598, "rewards/wordcountpos_reward/raw_geo/std": 0.5724788391781455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045819, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1386.5625, "completions/mean_terminated_length": 1348.75, "completions/min_length": 1178.0, "completions/min_terminated_length": 1178.0, "epoch": 0.543508701740348, "frac_reward_zero_std": 0.0, "grad_norm": 2.793003177211186, "kl": 0.01395416259765625, "learning_rate": 5.605257146191534e-07, "loss": -0.0269, "num_tokens": 118884351.0, "reward": 0.0, "reward_std": 0.6233595013618469, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1021319776906479, "rewards/wordcountpos_reward/raw_geo/std": 0.10829615317439183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797312, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1157.1875, "completions/mean_terminated_length": 1157.1875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.5437087417483497, "frac_reward_zero_std": 0.0, "grad_norm": 2.352334059741814, "kl": 0.0137939453125, "learning_rate": 5.60211568955837e-07, "loss": -0.0078, "num_tokens": 118925050.0, "reward": 0.0, "reward_std": 0.8778084516525269, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18396969231510818, "rewards/wordcountpos_reward/raw_geo/std": 0.19724473268982087, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1179.625, "completions/mean_terminated_length": 1133.857177734375, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.5439087817563513, "frac_reward_zero_std": 0.0, "grad_norm": 3.4885712133643354, "kl": 0.0257568359375, "learning_rate": 5.598974183133129e-07, "loss": -0.0239, "num_tokens": 118974172.0, "reward": 0.0, "reward_std": 0.6158977150917053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20413791821577312, "rewards/wordcountpos_reward/raw_geo/std": 0.31845760522489575, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1152.875, "completions/mean_terminated_length": 1129.7333984375, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.5441088217643528, "frac_reward_zero_std": 0.0, "grad_norm": 3.2185756106728856, "kl": 0.019378662109375, "learning_rate": 5.59583262844763e-07, "loss": -0.0875, "num_tokens": 119024354.0, "reward": 0.0, "reward_std": 0.45703521370887756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12350096224173086, "rewards/wordcountpos_reward/raw_geo/std": 0.08631817996924958, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1027.5, "completions/mean_terminated_length": 1027.5, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.5443088617723545, "frac_reward_zero_std": 0.0, "grad_norm": 3.4157345318859083, "kl": 0.0149078369140625, "learning_rate": 5.592691027033705e-07, "loss": -0.0381, "num_tokens": 119063354.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4208047091960907, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007524282521867938, "rewards/wordcountpos_reward/raw_geo/std": 0.20100407165760456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1063.0, "completions/max_terminated_length": 1063.0, "completions/mean_length": 775.375, "completions/mean_terminated_length": 775.375, "completions/min_length": 493.0, "completions/min_terminated_length": 493.0, "epoch": 0.544508901780356, "frac_reward_zero_std": 0.0, "grad_norm": 4.692781495343961, "kl": 0.0167083740234375, "learning_rate": 5.589549380423215e-07, "loss": 0.0353, "num_tokens": 119089400.0, "reward": -1.4901161193847656e-08, "reward_std": 0.950542688369751, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08960603483949144, "rewards/wordcountpos_reward/raw_geo/std": 0.05809422418942121, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1187.875, "completions/mean_terminated_length": 1115.84619140625, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.5447089417883577, "frac_reward_zero_std": 0.0, "grad_norm": 2.9212443903088747, "kl": 0.01763916015625, "learning_rate": 5.586407690148037e-07, "loss": 0.0185, "num_tokens": 119139734.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9746949672698975, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0167976442575286, "rewards/wordcountpos_reward/raw_geo/std": 0.03454710332828341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1182.8125, "completions/mean_terminated_length": 1161.666748046875, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.5449089817963593, "frac_reward_zero_std": 0.0, "grad_norm": 3.0258815585613688, "kl": 0.0130462646484375, "learning_rate": 5.58326595774008e-07, "loss": -0.0577, "num_tokens": 119175291.0, "reward": 0.0, "reward_std": 0.8801112174987793, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1431360053478116, "rewards/wordcountpos_reward/raw_geo/std": 0.2519630749346071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 1019.0, "completions/mean_terminated_length": 1019.0, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.5451090218043608, "frac_reward_zero_std": 0.0, "grad_norm": 2.3824622383361103, "kl": 0.0108795166015625, "learning_rate": 5.580124184731264e-07, "loss": -0.0246, "num_tokens": 119216779.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8155462145805359, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05084104482867015, "rewards/wordcountpos_reward/raw_geo/std": 0.07638491969177258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1173.375, "completions/mean_terminated_length": 1024.9091796875, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.5453090618123625, "frac_reward_zero_std": 0.0, "grad_norm": 2.8780308418855194, "kl": 0.03155517578125, "learning_rate": 5.576982372653531e-07, "loss": -0.0111, "num_tokens": 119255497.0, "reward": 0.0, "reward_std": 0.5433554649353027, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.056115128553636304, "rewards/wordcountpos_reward/raw_geo/std": 0.09342254213725008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1295.5, "completions/mean_terminated_length": 1266.2857666015625, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.545509101820364, "frac_reward_zero_std": 0.0, "grad_norm": 2.7192374104005514, "kl": 0.016387939453125, "learning_rate": 5.573840523038843e-07, "loss": 0.0059, "num_tokens": 119295641.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0414891242980957, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09341100794029267, "rewards/wordcountpos_reward/raw_geo/std": 0.10339063965708048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1201.0625, "completions/mean_terminated_length": 1201.0625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.5457091418283657, "frac_reward_zero_std": 0.0, "grad_norm": 3.448426461254428, "kl": 0.019073486328125, "learning_rate": 5.570698637419181e-07, "loss": -0.0112, "num_tokens": 119338410.0, "reward": 0.0, "reward_std": 0.3851112127304077, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12929385061383403, "rewards/wordcountpos_reward/raw_geo/std": 0.19900408505624909, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1227.8125, "completions/mean_terminated_length": 1064.5, "completions/min_length": 710.0, "completions/min_terminated_length": 710.0, "epoch": 0.5459091818363673, "frac_reward_zero_std": 0.0, "grad_norm": 3.343391005640543, "kl": 0.024627685546875, "learning_rate": 5.567556717326542e-07, "loss": 0.0079, "num_tokens": 119393895.0, "reward": 0.0, "reward_std": 0.5201990008354187, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03030508852376437, "rewards/wordcountpos_reward/raw_geo/std": 0.06641041161426035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 1021.6875, "completions/mean_terminated_length": 1021.6875, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.5461092218443688, "frac_reward_zero_std": 0.0, "grad_norm": 3.728995126583362, "kl": 0.021728515625, "learning_rate": 5.564414764292943e-07, "loss": -0.0052, "num_tokens": 119430482.0, "reward": 0.0, "reward_std": 0.7984132170677185, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19966205599795975, "rewards/wordcountpos_reward/raw_geo/std": 0.1190600590945957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0838870492807861, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1110.0, "completions/mean_terminated_length": 1084.0, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.5463092618523705, "frac_reward_zero_std": 0.0, "grad_norm": 2.648329677355547, "kl": 0.0201263427734375, "learning_rate": 5.56127277985041e-07, "loss": -0.0105, "num_tokens": 119473410.0, "reward": 0.0, "reward_std": 0.7949411273002625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.37293362856833157, "rewards/wordcountpos_reward/raw_geo/std": 0.5221847934918559, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1237.0625, "completions/mean_terminated_length": 1219.533447265625, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.546509301860372, "frac_reward_zero_std": 0.0, "grad_norm": 2.436732628582842, "kl": 0.011871337890625, "learning_rate": 5.558130765530993e-07, "loss": 0.009, "num_tokens": 119518307.0, "reward": 0.0, "reward_std": 0.9311035871505737, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18104796909572815, "rewards/wordcountpos_reward/raw_geo/std": 0.22064725684334435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1180.625, "completions/mean_terminated_length": 1106.923095703125, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.5467093418683737, "frac_reward_zero_std": 0.0, "grad_norm": 3.3773501716585215, "kl": 0.01873779296875, "learning_rate": 5.554988722866749e-07, "loss": -0.0063, "num_tokens": 119566701.0, "reward": 5.960464477539063e-08, "reward_std": 0.8495553731918335, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010131918899781342, "rewards/wordcountpos_reward/raw_geo/std": 0.05723113739770202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1092.375, "completions/mean_terminated_length": 1092.375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.5469093818763753, "frac_reward_zero_std": 0.0, "grad_norm": 3.5257232665550635, "kl": 0.019073486328125, "learning_rate": 5.551846653389754e-07, "loss": -0.0159, "num_tokens": 119609675.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0406248569488525, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03581879346660112, "rewards/wordcountpos_reward/raw_geo/std": 0.32776403700241513, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 1058.5, "completions/mean_terminated_length": 1029.0667724609375, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.5471094218843768, "frac_reward_zero_std": 0.0, "grad_norm": 2.6463248188677526, "kl": 0.0158233642578125, "learning_rate": 5.548704558632098e-07, "loss": -0.033, "num_tokens": 119655851.0, "reward": 0.0, "reward_std": 0.4434124231338501, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1539717861439773, "rewards/wordcountpos_reward/raw_geo/std": 0.1479538273583005, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1290.3125, "completions/mean_terminated_length": 1241.923095703125, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.5473094618923785, "frac_reward_zero_std": 0.0, "grad_norm": 3.028638422237495, "kl": 0.01776123046875, "learning_rate": 5.545562440125876e-07, "loss": -0.0288, "num_tokens": 119705976.0, "reward": -1.4901161193847656e-08, "reward_std": 1.015106201171875, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.042763088425018775, "rewards/wordcountpos_reward/raw_geo/std": 0.1235461807862803, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 975.375, "completions/mean_terminated_length": 975.375, "completions/min_length": 561.0, "completions/min_terminated_length": 561.0, "epoch": 0.5475095019003801, "frac_reward_zero_std": 0.0, "grad_norm": 3.3921566772652785, "kl": 0.0159759521484375, "learning_rate": 5.542420299403201e-07, "loss": -0.0475, "num_tokens": 119758486.0, "reward": 4.470348358154297e-08, "reward_std": 0.9721634387969971, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008431740481496853, "rewards/wordcountpos_reward/raw_geo/std": 0.023043267822154295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921948, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1435.6875, "completions/mean_terminated_length": 1328.5, "completions/min_length": 1242.0, "completions/min_terminated_length": 1242.0, "epoch": 0.5477095419083817, "frac_reward_zero_std": 0.0, "grad_norm": 2.7964250026758926, "kl": 0.015350341796875, "learning_rate": 5.539278137996196e-07, "loss": -0.0368, "num_tokens": 119815217.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6598259210586548, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11740464038077841, "rewards/wordcountpos_reward/raw_geo/std": 0.13444843996118785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1354.125, "completions/mean_terminated_length": 1287.8182373046875, "completions/min_length": 1157.0, "completions/min_terminated_length": 1157.0, "epoch": 0.5479095819163833, "frac_reward_zero_std": 0.0, "grad_norm": 2.788900740855187, "kl": 0.0157470703125, "learning_rate": 5.536135957436992e-07, "loss": -0.0366, "num_tokens": 119857523.0, "reward": 0.0, "reward_std": 0.9920048117637634, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.062406614281463754, "rewards/wordcountpos_reward/raw_geo/std": 0.07566627892772484, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1166.125, "completions/mean_terminated_length": 1166.125, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.5481096219243848, "frac_reward_zero_std": 0.0, "grad_norm": 2.689198604401958, "kl": 0.0131683349609375, "learning_rate": 5.532993759257734e-07, "loss": 0.0504, "num_tokens": 119896509.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9245504140853882, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03898996468850789, "rewards/wordcountpos_reward/raw_geo/std": 0.16108317561051655, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1140.0625, "completions/mean_terminated_length": 1088.6429443359375, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.5483096619323865, "frac_reward_zero_std": 0.0, "grad_norm": 3.7871345210311094, "kl": 0.0198974609375, "learning_rate": 5.529851544990564e-07, "loss": 0.0376, "num_tokens": 119948342.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7771061658859253, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07091096263338652, "rewards/wordcountpos_reward/raw_geo/std": 0.10447645681813969, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1099.6875, "completions/mean_terminated_length": 1073.0, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.5485097019403881, "frac_reward_zero_std": 0.0, "grad_norm": 2.6494472208984052, "kl": 0.0136260986328125, "learning_rate": 5.526709316167651e-07, "loss": 0.0005, "num_tokens": 119979169.0, "reward": 0.0, "reward_std": 1.0360790491104126, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07533067112127746, "rewards/wordcountpos_reward/raw_geo/std": 0.039336549819678886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1328.0625, "completions/mean_terminated_length": 1041.5, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.5487097419483897, "frac_reward_zero_std": 0.0, "grad_norm": 3.019738974484753, "kl": 0.01495361328125, "learning_rate": 5.523567074321149e-07, "loss": -0.0317, "num_tokens": 120040818.0, "reward": 1.4901161193847656e-08, "reward_std": 1.015598177909851, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06474748545896301, "rewards/wordcountpos_reward/raw_geo/std": 0.16025410274270271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 1199.8125, "completions/mean_terminated_length": 899.625, "completions/min_length": 709.0, "completions/min_terminated_length": 709.0, "epoch": 0.5489097819563913, "frac_reward_zero_std": 0.0, "grad_norm": 3.2681337545130864, "kl": 0.0189208984375, "learning_rate": 5.520424820983237e-07, "loss": -0.0222, "num_tokens": 120084895.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9753825664520264, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12500609666864415, "rewards/wordcountpos_reward/raw_geo/std": 0.22193706195598917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 986.6875, "completions/mean_terminated_length": 986.6875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.5491098219643928, "frac_reward_zero_std": 0.0, "grad_norm": 3.2174403111057126, "kl": 0.0166473388671875, "learning_rate": 5.517282557686091e-07, "loss": 0.0005, "num_tokens": 120120354.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9586610794067383, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18998196542895157, "rewards/wordcountpos_reward/raw_geo/std": 0.24295350397683252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1193.375, "completions/mean_terminated_length": 1172.933349609375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.5493098619723945, "frac_reward_zero_std": 0.0, "grad_norm": 2.2344935122183953, "kl": 0.0126800537109375, "learning_rate": 5.514140285961889e-07, "loss": 0.0024, "num_tokens": 120159696.0, "reward": 0.0, "reward_std": 0.8424824476242065, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10896917465675861, "rewards/wordcountpos_reward/raw_geo/std": 0.07668052604628561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1103.25, "completions/mean_terminated_length": 1103.25, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.5495099019803961, "frac_reward_zero_std": 0.0, "grad_norm": 3.2128911989497473, "kl": 0.01806640625, "learning_rate": 5.510998007342819e-07, "loss": -0.0248, "num_tokens": 120199124.0, "reward": 2.9802322387695312e-08, "reward_std": 0.865330696105957, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.048051379397565847, "rewards/wordcountpos_reward/raw_geo/std": 0.05896785949152844, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1123.0625, "completions/mean_terminated_length": 1123.0625, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.5497099419883977, "frac_reward_zero_std": 0.0, "grad_norm": 2.195619537609542, "kl": 0.0116424560546875, "learning_rate": 5.507855723361071e-07, "loss": 0.0083, "num_tokens": 120234733.0, "reward": -2.9802322387695312e-08, "reward_std": 0.797008752822876, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04279610695654323, "rewards/wordcountpos_reward/raw_geo/std": 0.03666487861577214, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1211.1875, "completions/mean_terminated_length": 986.5555419921875, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.5499099819963993, "frac_reward_zero_std": 0.0, "grad_norm": 3.1456160845637253, "kl": 0.0155792236328125, "learning_rate": 5.504713435548837e-07, "loss": -0.0474, "num_tokens": 120283192.0, "reward": 0.0, "reward_std": 0.7677489519119263, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0007731701535204152, "rewards/wordcountpos_reward/raw_geo/std": 0.2084547108999795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1398411797560202, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1143.8125, "completions/mean_terminated_length": 1143.8125, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.5501100220044008, "frac_reward_zero_std": 0.0, "grad_norm": 3.059108790025577, "kl": 0.0126495361328125, "learning_rate": 5.501571145438312e-07, "loss": -0.0155, "num_tokens": 120318989.0, "reward": 0.0, "reward_std": 0.7273299694061279, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17856060356617, "rewards/wordcountpos_reward/raw_geo/std": 0.107794551673357, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1181.1875, "completions/mean_terminated_length": 1181.1875, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.5503100620124025, "frac_reward_zero_std": 0.0, "grad_norm": 2.943885499682246, "kl": 0.0167694091796875, "learning_rate": 5.498428854561689e-07, "loss": -0.0301, "num_tokens": 120361816.0, "reward": 0.0, "reward_std": 0.559146523475647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16088920852373706, "rewards/wordcountpos_reward/raw_geo/std": 0.24461500252835996, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1073.875, "completions/mean_terminated_length": 1073.875, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.5505101020204041, "frac_reward_zero_std": 0.0, "grad_norm": 2.882637793466586, "kl": 0.015869140625, "learning_rate": 5.495286564451161e-07, "loss": -0.0255, "num_tokens": 120405326.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0568236112594604, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13652425227745957, "rewards/wordcountpos_reward/raw_geo/std": 0.11192081633854839, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1010.4375, "completions/mean_terminated_length": 1010.4375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.5507101420284057, "frac_reward_zero_std": 0.0, "grad_norm": 3.3552565707580984, "kl": 0.023895263671875, "learning_rate": 5.492144276638929e-07, "loss": -0.0227, "num_tokens": 120448269.0, "reward": -7.450580596923828e-09, "reward_std": 0.9331142902374268, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.013193224623567014, "rewards/wordcountpos_reward/raw_geo/std": 0.08139511422530779, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666115, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1207.5, "completions/mean_terminated_length": 1207.5, "completions/min_length": 1023.0, "completions/min_terminated_length": 1023.0, "epoch": 0.5509101820364073, "frac_reward_zero_std": 0.0, "grad_norm": 3.056603174085287, "kl": 0.01611328125, "learning_rate": 5.48900199265718e-07, "loss": 0.0022, "num_tokens": 120486165.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0035260915756226, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053624610299787934, "rewards/wordcountpos_reward/raw_geo/std": 0.050189706911464835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1053.625, "completions/mean_terminated_length": 1053.625, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.5511102220444088, "frac_reward_zero_std": 0.0, "grad_norm": 3.0282358109735577, "kl": 0.017791748046875, "learning_rate": 5.485859714038112e-07, "loss": -0.0026, "num_tokens": 120520431.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8616055250167847, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037052166329709875, "rewards/wordcountpos_reward/raw_geo/std": 0.16058323714667638, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1337.0625, "completions/mean_terminated_length": 1299.4615478515625, "completions/min_length": 1136.0, "completions/min_terminated_length": 1136.0, "epoch": 0.5513102620524105, "frac_reward_zero_std": 0.0, "grad_norm": 2.524190109202292, "kl": 0.0159454345703125, "learning_rate": 5.48271744231391e-07, "loss": 0.0037, "num_tokens": 120565520.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0147546529769897, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16308541597217321, "rewards/wordcountpos_reward/raw_geo/std": 0.19044824248017803, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.024343224778007377, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1138.875, "completions/mean_terminated_length": 1138.875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.5515103020604121, "frac_reward_zero_std": 0.0, "grad_norm": 3.423896286760874, "kl": 0.0158233642578125, "learning_rate": 5.479575179016762e-07, "loss": -0.0031, "num_tokens": 120607758.0, "reward": 0.0, "reward_std": 0.5777809023857117, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23007307859964551, "rewards/wordcountpos_reward/raw_geo/std": 0.07215767625632447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1120.25, "completions/mean_terminated_length": 1120.25, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.5517103420684136, "frac_reward_zero_std": 0.0, "grad_norm": 3.0154070571255756, "kl": 0.019989013671875, "learning_rate": 5.47643292567885e-07, "loss": -0.0422, "num_tokens": 120656002.0, "reward": 7.450580596923828e-09, "reward_std": 1.0415263175964355, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11020292618310133, "rewards/wordcountpos_reward/raw_geo/std": 0.11860266567667559, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1098.0625, "completions/mean_terminated_length": 1098.0625, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.5519103820764153, "frac_reward_zero_std": 0.0, "grad_norm": 3.576897094188133, "kl": 0.020111083984375, "learning_rate": 5.47329068383235e-07, "loss": -0.0491, "num_tokens": 120709243.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9269444942474365, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0023703980466707984, "rewards/wordcountpos_reward/raw_geo/std": 0.10181101774441752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1136.5625, "completions/mean_terminated_length": 1084.6429443359375, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.5521104220844169, "frac_reward_zero_std": 0.0, "grad_norm": 3.0437314466712864, "kl": 0.0153350830078125, "learning_rate": 5.470148455009433e-07, "loss": -0.0295, "num_tokens": 120759084.0, "reward": -1.862645149230957e-08, "reward_std": 0.9722009897232056, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16949982701874397, "rewards/wordcountpos_reward/raw_geo/std": 0.1731046983569288, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1178.8125, "completions/mean_terminated_length": 1178.8125, "completions/min_length": 587.0, "completions/min_terminated_length": 587.0, "epoch": 0.5523104620924185, "frac_reward_zero_std": 0.0, "grad_norm": 3.690061355725156, "kl": 0.025054931640625, "learning_rate": 5.467006240742267e-07, "loss": -0.0355, "num_tokens": 120812169.0, "reward": 0.0, "reward_std": 0.9404996633529663, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06857835163384486, "rewards/wordcountpos_reward/raw_geo/std": 0.1079587890088948, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1264.375, "completions/mean_terminated_length": 1210.0, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.5525105021004201, "frac_reward_zero_std": 0.0, "grad_norm": 2.8221973825208897, "kl": 0.0162811279296875, "learning_rate": 5.463864042563006e-07, "loss": -0.0327, "num_tokens": 120862519.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8534804582595825, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09497085298148827, "rewards/wordcountpos_reward/raw_geo/std": 0.057979591538145366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 1041.625, "completions/mean_terminated_length": 1041.625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.5527105421084216, "frac_reward_zero_std": 0.0, "grad_norm": 3.406790910662547, "kl": 0.0174407958984375, "learning_rate": 5.460721862003803e-07, "loss": -0.0302, "num_tokens": 120906809.0, "reward": 4.470348358154297e-08, "reward_std": 1.0358080863952637, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016999647047858765, "rewards/wordcountpos_reward/raw_geo/std": 0.12445808627659549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1111.3125, "completions/mean_terminated_length": 1085.4000244140625, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.5529105821164233, "frac_reward_zero_std": 0.0, "grad_norm": 3.453479741915102, "kl": 0.023773193359375, "learning_rate": 5.457579700596799e-07, "loss": -0.0219, "num_tokens": 120947230.0, "reward": 3.725290298461914e-09, "reward_std": 1.052828073501587, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.008537167080398613, "rewards/wordcountpos_reward/raw_geo/std": 0.1773712100997574, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.024343224778007377, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1026.1875, "completions/mean_terminated_length": 1026.1875, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.5531106221244249, "frac_reward_zero_std": 0.0, "grad_norm": 2.2337629692794128, "kl": 0.01104736328125, "learning_rate": 5.454437559874125e-07, "loss": 0.0047, "num_tokens": 120980905.0, "reward": 0.0, "reward_std": 0.7917390465736389, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15027597740975976, "rewards/wordcountpos_reward/raw_geo/std": 0.0631267586708266, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1143.625, "completions/mean_terminated_length": 1143.625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.5533106621324265, "frac_reward_zero_std": 0.0, "grad_norm": 2.649827714580753, "kl": 0.013580322265625, "learning_rate": 5.451295441367902e-07, "loss": 0.0148, "num_tokens": 121032379.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8612791299819946, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11645252760475128, "rewards/wordcountpos_reward/raw_geo/std": 0.07455195212021552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1384.125, "completions/mean_terminated_length": 1357.3846435546875, "completions/min_length": 1136.0, "completions/min_terminated_length": 1136.0, "epoch": 0.5535107021404281, "frac_reward_zero_std": 0.0, "grad_norm": 2.7360778862322124, "kl": 0.020172119140625, "learning_rate": 5.448153346610246e-07, "loss": -0.0059, "num_tokens": 121074613.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0514402389526367, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021420226472141368, "rewards/wordcountpos_reward/raw_geo/std": 0.12938092465968315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1364.3125, "completions/mean_terminated_length": 1319.0833740234375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.5537107421484296, "frac_reward_zero_std": 0.0, "grad_norm": 2.6515188213331724, "kl": 0.014373779296875, "learning_rate": 5.44501127713325e-07, "loss": -0.0211, "num_tokens": 121126074.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0055471658706665, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.104920363668206, "rewards/wordcountpos_reward/raw_geo/std": 0.14932899290343493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 1032.75, "completions/mean_terminated_length": 1032.75, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.5539107821564313, "frac_reward_zero_std": 0.0, "grad_norm": 3.045038132060954, "kl": 0.017974853515625, "learning_rate": 5.441869234469008e-07, "loss": -0.0278, "num_tokens": 121169198.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8099002838134766, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05418939559820554, "rewards/wordcountpos_reward/raw_geo/std": 0.17550428122667738, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 1060.0625, "completions/mean_terminated_length": 1060.0625, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.5541108221644329, "frac_reward_zero_std": 0.0, "grad_norm": 2.9174974024571285, "kl": 0.01495361328125, "learning_rate": 5.43872722014959e-07, "loss": -0.0191, "num_tokens": 121206439.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0137572288513184, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06290786199829485, "rewards/wordcountpos_reward/raw_geo/std": 0.05247630319407801, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.16510378329783743, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1337.4375, "completions/mean_terminated_length": 1263.5455322265625, "completions/min_length": 1088.0, "completions/min_terminated_length": 1088.0, "epoch": 0.5543108621724345, "frac_reward_zero_std": 0.0, "grad_norm": 2.656221660331925, "kl": 0.014923095703125, "learning_rate": 5.435585235707059e-07, "loss": -0.0219, "num_tokens": 121257278.0, "reward": 0.0, "reward_std": 0.8689007759094238, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06283533959660798, "rewards/wordcountpos_reward/raw_geo/std": 0.08497528714736785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1315.9375, "completions/mean_terminated_length": 1131.875, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.5545109021804361, "frac_reward_zero_std": 0.0, "grad_norm": 2.456185579282509, "kl": 0.01300048828125, "learning_rate": 5.432443282673455e-07, "loss": 0.0124, "num_tokens": 121304989.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9902583360671997, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004566777999558737, "rewards/wordcountpos_reward/raw_geo/std": 0.0844416043916669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1152.25, "completions/mean_terminated_length": 1129.0667724609375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.5547109421884376, "frac_reward_zero_std": 0.0, "grad_norm": 3.1470896614442903, "kl": 0.020538330078125, "learning_rate": 5.429301362580819e-07, "loss": 0.0308, "num_tokens": 121352897.0, "reward": 0.0, "reward_std": 0.8391436338424683, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09046463153235919, "rewards/wordcountpos_reward/raw_geo/std": 0.09614105106845652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1197.5625, "completions/mean_terminated_length": 1154.357177734375, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.5549109821964393, "frac_reward_zero_std": 0.0, "grad_norm": 3.0279032017615357, "kl": 0.016357421875, "learning_rate": 5.426159476961156e-07, "loss": -0.0123, "num_tokens": 121407730.0, "reward": -5.960464477539063e-08, "reward_std": 0.3802091181278229, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09696882474618271, "rewards/wordcountpos_reward/raw_geo/std": 0.18606823514442317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.1275843947266976, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1393.0625, "completions/mean_terminated_length": 1344.45458984375, "completions/min_length": 1118.0, "completions/min_terminated_length": 1118.0, "epoch": 0.5551110222044409, "frac_reward_zero_std": 0.0, "grad_norm": 2.6675112704396935, "kl": 0.017120361328125, "learning_rate": 5.423017627346469e-07, "loss": -0.0346, "num_tokens": 121455243.0, "reward": 5.960464477539063e-08, "reward_std": 0.41478779911994934, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12963545442202487, "rewards/wordcountpos_reward/raw_geo/std": 0.14206197924663674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1352.25, "completions/mean_terminated_length": 1303.0, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.5553110622124425, "frac_reward_zero_std": 0.0, "grad_norm": 3.0243521816135313, "kl": 0.017852783203125, "learning_rate": 5.419875815268736e-07, "loss": 0.0301, "num_tokens": 121505943.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0668723583221436, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048598353149695885, "rewards/wordcountpos_reward/raw_geo/std": 0.10558616570688957, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1039.1875, "completions/mean_terminated_length": 973.357177734375, "completions/min_length": 615.0, "completions/min_terminated_length": 615.0, "epoch": 0.5555111022204441, "frac_reward_zero_std": 0.0, "grad_norm": 3.036443418451255, "kl": 0.0173187255859375, "learning_rate": 5.416734042259919e-07, "loss": -0.0504, "num_tokens": 121560586.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7054977416992188, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2105963516170963, "rewards/wordcountpos_reward/raw_geo/std": 0.31905605126377684, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 879.625, "completions/mean_terminated_length": 879.625, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.5557111422284456, "frac_reward_zero_std": 0.0, "grad_norm": 3.7471036472529935, "kl": 0.018951416015625, "learning_rate": 5.413592309851963e-07, "loss": 0.0233, "num_tokens": 121591068.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9735217094421387, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12920448901643408, "rewards/wordcountpos_reward/raw_geo/std": 0.11724313375773425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1159.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 957.0, "completions/mean_terminated_length": 957.0, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.5559111822364473, "frac_reward_zero_std": 0.0, "grad_norm": 3.6476745216039013, "kl": 0.0173187255859375, "learning_rate": 5.410450619576786e-07, "loss": 0.0022, "num_tokens": 121618684.0, "reward": 0.0, "reward_std": 0.7262996435165405, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05286077475590997, "rewards/wordcountpos_reward/raw_geo/std": 0.055015888135849664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1160.6875, "completions/mean_terminated_length": 1160.6875, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.5561112222444489, "frac_reward_zero_std": 0.0, "grad_norm": 3.233615925262948, "kl": 0.018280029296875, "learning_rate": 5.407308972966297e-07, "loss": 0.0239, "num_tokens": 121657607.0, "reward": 2.2351741790771484e-08, "reward_std": 1.045502781867981, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04952518477484266, "rewards/wordcountpos_reward/raw_geo/std": 0.055248356766962534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1215.0, "completions/mean_terminated_length": 1149.2308349609375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.5563112622524505, "frac_reward_zero_std": 0.0, "grad_norm": 2.826054382696458, "kl": 0.01373291015625, "learning_rate": 5.404167371552371e-07, "loss": -0.0683, "num_tokens": 121710575.0, "reward": 0.0, "reward_std": 0.9951690435409546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06555775342682137, "rewards/wordcountpos_reward/raw_geo/std": 0.06933942385215623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1109.5625, "completions/mean_terminated_length": 979.4166870117188, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.5565113022604521, "frac_reward_zero_std": 0.0, "grad_norm": 3.0024352489259125, "kl": 0.014434814453125, "learning_rate": 5.401025816866871e-07, "loss": -0.0547, "num_tokens": 121753424.0, "reward": 0.0, "reward_std": 0.8346893191337585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05974050022641233, "rewards/wordcountpos_reward/raw_geo/std": 0.12158342417987907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1276.5, "completions/mean_terminated_length": 1142.4000244140625, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.5567113422684536, "frac_reward_zero_std": 0.0, "grad_norm": 2.8029311190752035, "kl": 0.01531982421875, "learning_rate": 5.397884310441631e-07, "loss": -0.0304, "num_tokens": 121799056.0, "reward": 0.0, "reward_std": 1.037974238395691, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03825509160018572, "rewards/wordcountpos_reward/raw_geo/std": 0.05023974856856543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1238.5625, "completions/mean_terminated_length": 1151.416748046875, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.5569113822764553, "frac_reward_zero_std": 0.0, "grad_norm": 2.724484206760618, "kl": 0.017425537109375, "learning_rate": 5.394742853808466e-07, "loss": 0.0046, "num_tokens": 121838569.0, "reward": 2.9802322387695312e-08, "reward_std": 0.870316207408905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03176950726000048, "rewards/wordcountpos_reward/raw_geo/std": 0.0695363908001046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1192.5625, "completions/mean_terminated_length": 1192.5625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.5571114222844569, "frac_reward_zero_std": 0.0, "grad_norm": 2.1003933061784554, "kl": 0.0148162841796875, "learning_rate": 5.391601448499163e-07, "loss": -0.0021, "num_tokens": 121885850.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0628564357757568, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03332135549750331, "rewards/wordcountpos_reward/raw_geo/std": 0.05955047397986795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 1243.6875, "completions/mean_terminated_length": 1158.25, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.5573114622924585, "frac_reward_zero_std": 0.0, "grad_norm": 2.269219111841901, "kl": 0.016448974609375, "learning_rate": 5.388460096045485e-07, "loss": 0.0331, "num_tokens": 121935749.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9977157115936279, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.4040354639185961, "rewards/wordcountpos_reward/raw_geo/std": 0.12304755651538501, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1139.5, "completions/mean_terminated_length": 1115.4666748046875, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.5575115023004601, "frac_reward_zero_std": 0.0, "grad_norm": 3.625563896890709, "kl": 0.02001953125, "learning_rate": 5.385318797979172e-07, "loss": -0.0457, "num_tokens": 121977309.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9654500484466553, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.037437647614799334, "rewards/wordcountpos_reward/raw_geo/std": 0.264565668724117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1230.6875, "completions/mean_terminated_length": 1212.7333984375, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.5577115423084616, "frac_reward_zero_std": 0.0, "grad_norm": 2.5492479739886527, "kl": 0.013946533203125, "learning_rate": 5.382177555831934e-07, "loss": -0.0018, "num_tokens": 122024000.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0152817964553833, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08816517169520995, "rewards/wordcountpos_reward/raw_geo/std": 0.09195814739930336, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1194.75, "completions/mean_terminated_length": 1124.3077392578125, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.5579115823164633, "frac_reward_zero_std": 0.0, "grad_norm": 3.348305658735377, "kl": 0.01922607421875, "learning_rate": 5.379036371135453e-07, "loss": -0.0243, "num_tokens": 122068972.0, "reward": 4.470348358154297e-08, "reward_std": 1.0584721565246582, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1621819530956879, "rewards/wordcountpos_reward/raw_geo/std": 0.10329181797184754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 1046.1875, "completions/mean_terminated_length": 1046.1875, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.5581116223244649, "frac_reward_zero_std": 0.0, "grad_norm": 3.583397530714282, "kl": 0.0186767578125, "learning_rate": 5.375895245421389e-07, "loss": 0.0066, "num_tokens": 122112815.0, "reward": 0.0, "reward_std": 0.7243232727050781, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04169706393356827, "rewards/wordcountpos_reward/raw_geo/std": 0.05347103158532924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12583057392117916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1132.75, "completions/mean_terminated_length": 1132.75, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.5583116623324665, "frac_reward_zero_std": 0.0, "grad_norm": 3.1200417082860072, "kl": 0.018890380859375, "learning_rate": 5.372754180221366e-07, "loss": -0.0066, "num_tokens": 122161555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.872317910194397, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05033754641260486, "rewards/wordcountpos_reward/raw_geo/std": 0.0574237450901158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1323.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 1089.6875, "completions/mean_terminated_length": 1089.6875, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.5585117023404681, "frac_reward_zero_std": 0.0, "grad_norm": 3.193578236857476, "kl": 0.0180816650390625, "learning_rate": 5.369613177066983e-07, "loss": 0.0068, "num_tokens": 122198886.0, "reward": 0.0, "reward_std": 0.8207800388336182, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10396272511014984, "rewards/wordcountpos_reward/raw_geo/std": 0.07750865183364884, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1235.9375, "completions/mean_terminated_length": 1235.9375, "completions/min_length": 1101.0, "completions/min_terminated_length": 1101.0, "epoch": 0.5587117423484697, "frac_reward_zero_std": 0.0, "grad_norm": 3.4607668371138023, "kl": 0.019744873046875, "learning_rate": 5.366472237489805e-07, "loss": -0.0256, "num_tokens": 122239421.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9992342591285706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.020338832159167826, "rewards/wordcountpos_reward/raw_geo/std": 0.06324899201839994, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1010.6875, "completions/mean_terminated_length": 978.0667114257812, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.5589117823564713, "frac_reward_zero_std": 0.0, "grad_norm": 3.2665193551105416, "kl": 0.0148468017578125, "learning_rate": 5.363331363021373e-07, "loss": -0.0432, "num_tokens": 122274864.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0254037380218506, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.045773540499057595, "rewards/wordcountpos_reward/raw_geo/std": 0.07392085000068642, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1076.4375, "completions/mean_terminated_length": 1076.4375, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.5591118223644729, "frac_reward_zero_std": 0.0, "grad_norm": 3.055631965077337, "kl": 0.0167236328125, "learning_rate": 5.360190555193187e-07, "loss": -0.032, "num_tokens": 122317071.0, "reward": 0.0, "reward_std": 0.6155973672866821, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11349773294029099, "rewards/wordcountpos_reward/raw_geo/std": 0.21483900309418416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1341.1875, "completions/mean_terminated_length": 1341.1875, "completions/min_length": 1205.0, "completions/min_terminated_length": 1205.0, "epoch": 0.5593118623724745, "frac_reward_zero_std": 0.0, "grad_norm": 2.7358236919113756, "kl": 0.0148468017578125, "learning_rate": 5.357049815536723e-07, "loss": -0.0007, "num_tokens": 122367914.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6584945917129517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03562494311472758, "rewards/wordcountpos_reward/raw_geo/std": 0.1582065322997454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1064.6429443359375, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.5595119023804761, "frac_reward_zero_std": 0.0, "grad_norm": 2.7081346732192326, "kl": 0.01458740234375, "learning_rate": 5.353909145583415e-07, "loss": 0.0501, "num_tokens": 122399115.0, "reward": 0.0, "reward_std": 0.8230560421943665, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14190498930120032, "rewards/wordcountpos_reward/raw_geo/std": 0.16481110094231718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 1000.8125, "completions/mean_terminated_length": 967.5333862304688, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.5597119423884777, "frac_reward_zero_std": 0.0, "grad_norm": 3.324453840164413, "kl": 0.01245880126953125, "learning_rate": 5.350768546864674e-07, "loss": 0.0216, "num_tokens": 122438128.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7401484251022339, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11415176540574203, "rewards/wordcountpos_reward/raw_geo/std": 0.1632288694986641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.05962847939999442, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1087.25, "completions/mean_terminated_length": 1087.25, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.5599119823964793, "frac_reward_zero_std": 0.0, "grad_norm": 3.9263849651909757, "kl": 0.023040771484375, "learning_rate": 5.347628020911865e-07, "loss": -0.051, "num_tokens": 122483452.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5256354808807373, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014714821017664918, "rewards/wordcountpos_reward/raw_geo/std": 0.14245971435999794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1044.9375, "completions/mean_terminated_length": 1044.9375, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.5601120224044809, "frac_reward_zero_std": 0.0, "grad_norm": 1.9292580354358833, "kl": 0.00797271728515625, "learning_rate": 5.344487569256327e-07, "loss": 0.0101, "num_tokens": 122524227.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0198633670806885, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1435606039013041, "rewards/wordcountpos_reward/raw_geo/std": 0.09121877948776455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1063.4375, "completions/mean_terminated_length": 1063.4375, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.5603120624124825, "frac_reward_zero_std": 0.0, "grad_norm": 2.851656492620111, "kl": 0.0151214599609375, "learning_rate": 5.341347193429354e-07, "loss": -0.0243, "num_tokens": 122565186.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0035573244094849, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05911713504649769, "rewards/wordcountpos_reward/raw_geo/std": 0.04609914443306723, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1439521525445946, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1309.0625, "completions/mean_terminated_length": 1281.7857666015625, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.5605121024204841, "frac_reward_zero_std": 0.0, "grad_norm": 3.18269866741104, "kl": 0.016998291015625, "learning_rate": 5.338206894962213e-07, "loss": 0.0064, "num_tokens": 122621043.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9486033916473389, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1939212382905569, "rewards/wordcountpos_reward/raw_geo/std": 0.09933622726680856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1262.1875, "completions/mean_terminated_length": 1246.3333740234375, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.5607121424284857, "frac_reward_zero_std": 0.0, "grad_norm": 2.860758909546288, "kl": 0.014678955078125, "learning_rate": 5.335066675386122e-07, "loss": -0.0113, "num_tokens": 122670774.0, "reward": 0.0, "reward_std": 0.6976358294487, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09520780852484419, "rewards/wordcountpos_reward/raw_geo/std": 0.07862129472120519, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1123.125, "completions/mean_terminated_length": 1123.125, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5609121824364873, "frac_reward_zero_std": 0.0, "grad_norm": 3.4726106659983507, "kl": 0.021697998046875, "learning_rate": 5.331926536232271e-07, "loss": -0.0027, "num_tokens": 122721872.0, "reward": 7.450580596923828e-09, "reward_std": 1.018176794052124, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.010832357136047457, "rewards/wordcountpos_reward/raw_geo/std": 0.1285773052566421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1166.625, "completions/mean_terminated_length": 1144.4000244140625, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.5611122224444889, "frac_reward_zero_std": 0.0, "grad_norm": 2.6265578592983454, "kl": 0.0156707763671875, "learning_rate": 5.328786479031802e-07, "loss": 0.0417, "num_tokens": 122767090.0, "reward": 7.450580596923828e-09, "reward_std": 1.0597172975540161, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.029454632961181115, "rewards/wordcountpos_reward/raw_geo/std": 0.057871483189472786, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1157.8125, "completions/mean_terminated_length": 1135.0, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.5613122624524906, "frac_reward_zero_std": 0.0, "grad_norm": 2.970701918690622, "kl": 0.0158233642578125, "learning_rate": 5.325646505315825e-07, "loss": -0.0289, "num_tokens": 122797559.0, "reward": -1.4901161193847656e-08, "reward_std": 1.062283992767334, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01470146022742564, "rewards/wordcountpos_reward/raw_geo/std": 0.03234783455608466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1294.5625, "completions/mean_terminated_length": 1201.181884765625, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.5615123024604921, "frac_reward_zero_std": 0.0, "grad_norm": 2.6137701611563364, "kl": 0.0154876708984375, "learning_rate": 5.322506616615403e-07, "loss": 0.0118, "num_tokens": 122848632.0, "reward": 0.0, "reward_std": 0.7274706363677979, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09897501602450609, "rewards/wordcountpos_reward/raw_geo/std": 0.05843326206100703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1066.5625, "completions/mean_terminated_length": 1037.666748046875, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.5617123424684937, "frac_reward_zero_std": 0.0, "grad_norm": 3.410731114029286, "kl": 0.019317626953125, "learning_rate": 5.319366814461561e-07, "loss": -0.1338, "num_tokens": 122891361.0, "reward": 1.862645149230957e-08, "reward_std": 1.0122787952423096, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00513953871505112, "rewards/wordcountpos_reward/raw_geo/std": 0.1005198318520564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1326.75, "completions/mean_terminated_length": 1286.769287109375, "completions/min_length": 1023.0, "completions/min_terminated_length": 1023.0, "epoch": 0.5619123824764953, "frac_reward_zero_std": 0.0, "grad_norm": 3.4546133331115794, "kl": 0.022308349609375, "learning_rate": 5.31622710038528e-07, "loss": 0.0157, "num_tokens": 122949053.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8338626623153687, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04103595667411785, "rewards/wordcountpos_reward/raw_geo/std": 0.10698115882923663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639735, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1236.375, "completions/mean_terminated_length": 1175.5384521484375, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.5621124224844969, "frac_reward_zero_std": 0.0, "grad_norm": 3.1068968858131867, "kl": 0.018157958984375, "learning_rate": 5.3130874759175e-07, "loss": -0.0025, "num_tokens": 122993579.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7160733938217163, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029592689392502487, "rewards/wordcountpos_reward/raw_geo/std": 0.05501854452538951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1220.1875, "completions/mean_terminated_length": 1201.533447265625, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.5623124624924986, "frac_reward_zero_std": 0.0, "grad_norm": 2.8556774888397625, "kl": 0.01568603515625, "learning_rate": 5.309947942589114e-07, "loss": -0.0102, "num_tokens": 123036302.0, "reward": 0.0, "reward_std": 0.6155783534049988, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04401716076613279, "rewards/wordcountpos_reward/raw_geo/std": 0.0869048695491323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1342.8125, "completions/mean_terminated_length": 1271.3636474609375, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.5625125025005001, "frac_reward_zero_std": 0.0, "grad_norm": 3.031620248477741, "kl": 0.0163421630859375, "learning_rate": 5.306808501930977e-07, "loss": 0.0241, "num_tokens": 123084003.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9935848712921143, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06636721305520435, "rewards/wordcountpos_reward/raw_geo/std": 0.06433370166964675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1108.5625, "completions/mean_terminated_length": 1108.5625, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.5627125425085017, "frac_reward_zero_std": 0.0, "grad_norm": 3.255819946070304, "kl": 0.0203857421875, "learning_rate": 5.30366915547389e-07, "loss": -0.001, "num_tokens": 123130988.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9951785802841187, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0016919241035241656, "rewards/wordcountpos_reward/raw_geo/std": 0.0067676964140966625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1117.0, "completions/mean_terminated_length": 1117.0, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.5629125825165033, "frac_reward_zero_std": 0.0, "grad_norm": 3.131387953405485, "kl": 0.01776123046875, "learning_rate": 5.300529904748616e-07, "loss": -0.0108, "num_tokens": 123172492.0, "reward": -7.450580596923828e-09, "reward_std": 1.0362052917480469, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14473438399701596, "rewards/wordcountpos_reward/raw_geo/std": 0.048671691854633166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1048.75, "completions/mean_terminated_length": 1048.75, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.5631126225245049, "frac_reward_zero_std": 0.0, "grad_norm": 3.409456679868988, "kl": 0.0172882080078125, "learning_rate": 5.297390751285863e-07, "loss": 0.0166, "num_tokens": 123224688.0, "reward": 0.0, "reward_std": 0.8581966161727905, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06318790867045163, "rewards/wordcountpos_reward/raw_geo/std": 0.06841829472288084, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1006.0625, "completions/mean_terminated_length": 1006.0625, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.5633126625325064, "frac_reward_zero_std": 0.0, "grad_norm": 3.3173886688948437, "kl": 0.0167694091796875, "learning_rate": 5.294251696616302e-07, "loss": -0.0186, "num_tokens": 123262761.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8060303926467896, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0369195043706377, "rewards/wordcountpos_reward/raw_geo/std": 0.08856019794529271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1234.75, "completions/mean_terminated_length": 1173.5384521484375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.5635127025405081, "frac_reward_zero_std": 0.0, "grad_norm": 3.5606437640209476, "kl": 0.02984619140625, "learning_rate": 5.291112742270547e-07, "loss": 0.0291, "num_tokens": 123308477.0, "reward": 0.0, "reward_std": 0.6397988796234131, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05450023324938344, "rewards/wordcountpos_reward/raw_geo/std": 0.12242645833993934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1026.0, "completions/mean_terminated_length": 1026.0, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.5637127425485097, "frac_reward_zero_std": 0.0, "grad_norm": 3.3519241038129968, "kl": 0.015411376953125, "learning_rate": 5.287973889779168e-07, "loss": -0.0439, "num_tokens": 123343981.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0396840572357178, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0346153340123795, "rewards/wordcountpos_reward/raw_geo/std": 0.027584621618184067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1270.875, "completions/mean_terminated_length": 1238.1429443359375, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.5639127825565113, "frac_reward_zero_std": 0.0, "grad_norm": 2.8946499349057744, "kl": 0.0146484375, "learning_rate": 5.284835140672682e-07, "loss": -0.001, "num_tokens": 123396243.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8773537278175354, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009623125235701704, "rewards/wordcountpos_reward/raw_geo/std": 0.09279665804345034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1224.75, "completions/mean_terminated_length": 1185.4285888671875, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.5641128225645129, "frac_reward_zero_std": 0.0, "grad_norm": 2.9272249215123374, "kl": 0.0182342529296875, "learning_rate": 5.281696496481561e-07, "loss": 0.0128, "num_tokens": 123446359.0, "reward": 1.4901161193847656e-08, "reward_std": 1.030010461807251, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14446656954785325, "rewards/wordcountpos_reward/raw_geo/std": 0.054741164367316394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1047.125, "completions/mean_terminated_length": 1016.9334106445312, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.5643128625725145, "frac_reward_zero_std": 0.0, "grad_norm": 3.1181321700098543, "kl": 0.0192718505859375, "learning_rate": 5.278557958736216e-07, "loss": -0.0265, "num_tokens": 123480169.0, "reward": 0.0, "reward_std": 1.0611028671264648, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1949092912368235, "rewards/wordcountpos_reward/raw_geo/std": 0.06281981891523235, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1328.75, "completions/mean_terminated_length": 1271.666748046875, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.5645129025805161, "frac_reward_zero_std": 0.0, "grad_norm": 3.168015789587999, "kl": 0.01593017578125, "learning_rate": 5.27541952896702e-07, "loss": -0.0226, "num_tokens": 123525349.0, "reward": 0.0, "reward_std": 0.9607175588607788, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12555723340076513, "rewards/wordcountpos_reward/raw_geo/std": 0.1083772649197249, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1094.4375, "completions/mean_terminated_length": 1094.4375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.5647129425885177, "frac_reward_zero_std": 0.0, "grad_norm": 3.4603253745995675, "kl": 0.0164031982421875, "learning_rate": 5.272281208704277e-07, "loss": -0.0129, "num_tokens": 123569924.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7486159801483154, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15791202391847317, "rewards/wordcountpos_reward/raw_geo/std": 0.2024653265046756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 1074.125, "completions/mean_terminated_length": 1045.7333984375, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.5649129825965193, "frac_reward_zero_std": 0.0, "grad_norm": 3.4677595980533438, "kl": 0.016357421875, "learning_rate": 5.269142999478254e-07, "loss": -0.0287, "num_tokens": 123621798.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9968963861465454, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11493642962341274, "rewards/wordcountpos_reward/raw_geo/std": 0.046315983673525386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1212.1875, "completions/mean_terminated_length": 1193.0001220703125, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.5651130226045209, "frac_reward_zero_std": 0.0, "grad_norm": 3.1832022604788373, "kl": 0.0155487060546875, "learning_rate": 5.266004902819153e-07, "loss": 0.0207, "num_tokens": 123661121.0, "reward": 0.0, "reward_std": 0.6362428665161133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0584882986410087, "rewards/wordcountpos_reward/raw_geo/std": 0.06214886129744758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1254.6875, "completions/mean_terminated_length": 1219.6429443359375, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.5653130626125225, "frac_reward_zero_std": 0.0, "grad_norm": 3.2071995615187494, "kl": 0.0166015625, "learning_rate": 5.262866920257122e-07, "loss": 0.0177, "num_tokens": 123695492.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9034401774406433, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033216229177993065, "rewards/wordcountpos_reward/raw_geo/std": 0.12152436573500844, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1172.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 1038.125, "completions/mean_terminated_length": 1038.125, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.5655131026205241, "frac_reward_zero_std": 0.0, "grad_norm": 1.444873532436064, "kl": 0.0067653656005859375, "learning_rate": 5.259729053322258e-07, "loss": -0.0286, "num_tokens": 123731686.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9429427981376648, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07739332459482763, "rewards/wordcountpos_reward/raw_geo/std": 0.0725297289454433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1166.25, "completions/mean_terminated_length": 1144.0, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.5657131426285257, "frac_reward_zero_std": 0.0, "grad_norm": 2.8437608523387263, "kl": 0.01324462890625, "learning_rate": 5.2565913035446e-07, "loss": -0.0508, "num_tokens": 123766170.0, "reward": 0.0, "reward_std": 0.8199436664581299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19372635582278686, "rewards/wordcountpos_reward/raw_geo/std": 0.1523155603277147, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1090.875, "completions/mean_terminated_length": 1090.875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.5659131826365273, "frac_reward_zero_std": 0.0, "grad_norm": 3.1020245735367173, "kl": 0.017303466796875, "learning_rate": 5.253453672454126e-07, "loss": -0.0358, "num_tokens": 123808176.0, "reward": 0.0, "reward_std": 0.9610136151313782, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07484874101803551, "rewards/wordcountpos_reward/raw_geo/std": 0.1785947244275245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1103.0, "completions/mean_terminated_length": 1103.0, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.5661132226445289, "frac_reward_zero_std": 0.0, "grad_norm": 3.185538052764283, "kl": 0.01922607421875, "learning_rate": 5.250316161580761e-07, "loss": -0.0161, "num_tokens": 123849248.0, "reward": 0.0, "reward_std": 0.8116010427474976, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024221623749607873, "rewards/wordcountpos_reward/raw_geo/std": 0.08204362165273758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1278.8125, "completions/mean_terminated_length": 1264.0667724609375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.5663132626525305, "frac_reward_zero_std": 0.0, "grad_norm": 3.2590520092700355, "kl": 0.0201416015625, "learning_rate": 5.247178772454369e-07, "loss": -0.0009, "num_tokens": 123888269.0, "reward": 0.0, "reward_std": 1.016416311264038, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.001152302241801682, "rewards/wordcountpos_reward/raw_geo/std": 0.1530386095992914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1014.0, "completions/mean_length": 1197.1875, "completions/mean_terminated_length": 894.375, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.5665133026605321, "frac_reward_zero_std": 0.0, "grad_norm": 2.7141250038375655, "kl": 0.01336669921875, "learning_rate": 5.244041506604758e-07, "loss": 0.0027, "num_tokens": 123941392.0, "reward": 5.960464477539063e-08, "reward_std": 0.5063736438751221, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26055279928362524, "rewards/wordcountpos_reward/raw_geo/std": 0.2844207786833347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1158.375, "completions/mean_terminated_length": 1158.375, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.5667133426685337, "frac_reward_zero_std": 0.0, "grad_norm": 2.785603928641754, "kl": 0.0123748779296875, "learning_rate": 5.240904365561669e-07, "loss": -0.0523, "num_tokens": 123982350.0, "reward": 0.0, "reward_std": 0.7677541971206665, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16730825700303967, "rewards/wordcountpos_reward/raw_geo/std": 0.055762644087533994, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1351.3125, "completions/mean_terminated_length": 1283.727294921875, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.5669133826765353, "frac_reward_zero_std": 0.0, "grad_norm": 3.0694002167635572, "kl": 0.021270751953125, "learning_rate": 5.237767350854788e-07, "loss": 0.0344, "num_tokens": 124036195.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0597902536392212, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16086902963330815, "rewards/wordcountpos_reward/raw_geo/std": 0.08701703223417126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14907119849998599, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1115.3125, "completions/mean_terminated_length": 1115.3125, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.5671134226845369, "frac_reward_zero_std": 0.0, "grad_norm": 3.1791055326131126, "kl": 0.017425537109375, "learning_rate": 5.234630464013737e-07, "loss": -0.0229, "num_tokens": 124069704.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7970287799835205, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03993397849689377, "rewards/wordcountpos_reward/raw_geo/std": 0.047476470894573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1167.5, "completions/mean_terminated_length": 1167.5, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.5673134626925385, "frac_reward_zero_std": 0.0, "grad_norm": 2.705334437180358, "kl": 0.014129638671875, "learning_rate": 5.231493706568077e-07, "loss": 0.0126, "num_tokens": 124109104.0, "reward": 0.0, "reward_std": 0.5094152092933655, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01790750318497076, "rewards/wordcountpos_reward/raw_geo/std": 0.13801889470881637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1326.375, "completions/mean_terminated_length": 1152.75, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.5675135027005401, "frac_reward_zero_std": 0.0, "grad_norm": 2.2684566739543564, "kl": 0.012176513671875, "learning_rate": 5.228357080047306e-07, "loss": -0.0292, "num_tokens": 124160406.0, "reward": 0.0, "reward_std": 0.7170937657356262, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.057244996231410834, "rewards/wordcountpos_reward/raw_geo/std": 0.12763068098441113, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14801151106386087, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1134.6875, "completions/mean_terminated_length": 1134.6875, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.5677135427085417, "frac_reward_zero_std": 0.0, "grad_norm": 3.5216058952170552, "kl": 0.020050048828125, "learning_rate": 5.225220585980855e-07, "loss": -0.022, "num_tokens": 124201409.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8921395540237427, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02764675809170321, "rewards/wordcountpos_reward/raw_geo/std": 0.05336102770324488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1199.0625, "completions/mean_terminated_length": 1199.0625, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.5679135827165434, "frac_reward_zero_std": 0.0, "grad_norm": 3.0262874922279086, "kl": 0.01531982421875, "learning_rate": 5.222084225898096e-07, "loss": 0.0144, "num_tokens": 124247826.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9933990240097046, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0225961131319504, "rewards/wordcountpos_reward/raw_geo/std": 0.12313955161759517, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1286.25, "completions/mean_terminated_length": 1215.0, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.5681136227245449, "frac_reward_zero_std": 0.0, "grad_norm": 2.9639196494232363, "kl": 0.0164337158203125, "learning_rate": 5.218948001328327e-07, "loss": -0.0055, "num_tokens": 124291310.0, "reward": 0.0, "reward_std": 0.7391352653503418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03160701879839779, "rewards/wordcountpos_reward/raw_geo/std": 0.11226629892577866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1134.6875, "completions/mean_terminated_length": 1134.6875, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.5683136627325465, "frac_reward_zero_std": 0.0, "grad_norm": 3.121274133018646, "kl": 0.0206451416015625, "learning_rate": 5.215811913800792e-07, "loss": -0.0217, "num_tokens": 124338129.0, "reward": 7.450580596923828e-09, "reward_std": 1.0458449125289917, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09470104782422661, "rewards/wordcountpos_reward/raw_geo/std": 0.20337197277549454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1036.0, "completions/mean_terminated_length": 1036.0, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.5685137027405481, "frac_reward_zero_std": 0.0, "grad_norm": 2.7700853814921933, "kl": 0.0120849609375, "learning_rate": 5.212675964844657e-07, "loss": -0.014, "num_tokens": 124384521.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6996588110923767, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09706625896251406, "rewards/wordcountpos_reward/raw_geo/std": 0.14738220396839447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1024.0, "completions/mean_terminated_length": 1024.0, "completions/min_length": 701.0, "completions/min_terminated_length": 701.0, "epoch": 0.5687137427485497, "frac_reward_zero_std": 0.0, "grad_norm": 3.3868483213225327, "kl": 0.0160675048828125, "learning_rate": 5.209540155989026e-07, "loss": 0.055, "num_tokens": 124423305.0, "reward": -2.2351741790771484e-08, "reward_std": 1.055931806564331, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053069117961266836, "rewards/wordcountpos_reward/raw_geo/std": 0.09065331318915908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1112.875, "completions/mean_terminated_length": 1057.571533203125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.5689137827565514, "frac_reward_zero_std": 0.0, "grad_norm": 3.502469475451409, "kl": 0.01812744140625, "learning_rate": 5.206404488762933e-07, "loss": 0.031, "num_tokens": 124469343.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8497471809387207, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0919534365055007, "rewards/wordcountpos_reward/raw_geo/std": 0.1429130738742186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1102.8125, "completions/mean_terminated_length": 1102.8125, "completions/min_length": 651.0, "completions/min_terminated_length": 651.0, "epoch": 0.5691138227645529, "frac_reward_zero_std": 0.0, "grad_norm": 2.8947913608341187, "kl": 0.017974853515625, "learning_rate": 5.203268964695347e-07, "loss": -0.0129, "num_tokens": 124507020.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9007157683372498, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1131967619457685, "rewards/wordcountpos_reward/raw_geo/std": 0.14903371522116507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1174.9375, "completions/mean_terminated_length": 1153.2667236328125, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.5693138627725545, "frac_reward_zero_std": 0.0, "grad_norm": 3.1378480385074097, "kl": 0.017242431640625, "learning_rate": 5.200133585315159e-07, "loss": -0.005, "num_tokens": 124540755.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0443532466888428, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13716803415131804, "rewards/wordcountpos_reward/raw_geo/std": 0.1521748780557624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 925.5625, "completions/mean_terminated_length": 925.5625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.5695139027805561, "frac_reward_zero_std": 0.0, "grad_norm": 4.063628088603183, "kl": 0.020751953125, "learning_rate": 5.196998352151198e-07, "loss": -0.0087, "num_tokens": 124581060.0, "reward": 0.0, "reward_std": 0.7597005367279053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05680789319659062, "rewards/wordcountpos_reward/raw_geo/std": 0.12525666111021183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1128.1875, "completions/mean_terminated_length": 1128.1875, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.5697139427885577, "frac_reward_zero_std": 0.0, "grad_norm": 2.8547378143892703, "kl": 0.017822265625, "learning_rate": 5.193863266732215e-07, "loss": -0.023, "num_tokens": 124620615.0, "reward": 0.0, "reward_std": 0.6318148970603943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015927365921658474, "rewards/wordcountpos_reward/raw_geo/std": 0.10876411312162038, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1328.875, "completions/mean_terminated_length": 1251.0909423828125, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.5699139827965594, "frac_reward_zero_std": 0.0, "grad_norm": 3.0544603135829758, "kl": 0.0186004638671875, "learning_rate": 5.190728330586894e-07, "loss": -0.0111, "num_tokens": 124675621.0, "reward": -7.450580596923828e-09, "reward_std": 1.021884560585022, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.19278747539531146, "rewards/wordcountpos_reward/raw_geo/std": 0.24624505774156494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1284.0625, "completions/mean_terminated_length": 1284.0625, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.5701140228045609, "frac_reward_zero_std": 0.0, "grad_norm": 3.4448884455996955, "kl": 0.022918701171875, "learning_rate": 5.187593545243843e-07, "loss": -0.0308, "num_tokens": 124726942.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6151143908500671, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08558050714593178, "rewards/wordcountpos_reward/raw_geo/std": 0.10399368499359656, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1196.375, "completions/mean_terminated_length": 1196.375, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.5703140628125625, "frac_reward_zero_std": 0.0, "grad_norm": 2.823824652396261, "kl": 0.0174560546875, "learning_rate": 5.184458912231599e-07, "loss": 0.01, "num_tokens": 124778036.0, "reward": 5.960464477539063e-08, "reward_std": 0.5501927137374878, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0140937907053269, "rewards/wordcountpos_reward/raw_geo/std": 0.18998472891706833, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1236.5, "completions/mean_terminated_length": 1078.4000244140625, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.5705141028205641, "frac_reward_zero_std": 0.0, "grad_norm": 2.638970732765592, "kl": 0.0124969482421875, "learning_rate": 5.181324433078621e-07, "loss": -0.0198, "num_tokens": 124831516.0, "reward": 0.0, "reward_std": 0.8288732767105103, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23257611906615097, "rewards/wordcountpos_reward/raw_geo/std": 0.21383315337693531, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0838870492807861, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1146.1875, "completions/mean_terminated_length": 1122.60009765625, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.5707141428285657, "frac_reward_zero_std": 0.0, "grad_norm": 3.1655537700735104, "kl": 0.0164031982421875, "learning_rate": 5.178190109313297e-07, "loss": -0.0361, "num_tokens": 124872439.0, "reward": 0.0, "reward_std": 0.8851280808448792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03595974595029996, "rewards/wordcountpos_reward/raw_geo/std": 0.17711813929946404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1120.5625, "completions/mean_terminated_length": 1095.2667236328125, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.5709141828365674, "frac_reward_zero_std": 0.0, "grad_norm": 3.221826005455707, "kl": 0.0169677734375, "learning_rate": 5.175055942463937e-07, "loss": 0.0219, "num_tokens": 124917504.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8058701753616333, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05775489695980435, "rewards/wordcountpos_reward/raw_geo/std": 0.050181596477412285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1181.6875, "completions/mean_terminated_length": 1181.6875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.5711142228445689, "frac_reward_zero_std": 0.0, "grad_norm": 3.375772716388652, "kl": 0.017303466796875, "learning_rate": 5.171921934058775e-07, "loss": -0.0208, "num_tokens": 124966891.0, "reward": -3.5390257835388184e-08, "reward_std": 0.9742732048034668, "rewards/wordcountpos_reward/mean": -3.5390257835388184e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11804814663944939, "rewards/wordcountpos_reward/raw_geo/std": 0.09543011564639496, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 1000.5, "completions/mean_terminated_length": 1000.5, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.5713142628525705, "frac_reward_zero_std": 0.0, "grad_norm": 4.081348257482353, "kl": 0.02178955078125, "learning_rate": 5.168788085625968e-07, "loss": -0.026, "num_tokens": 125016307.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0404261350631714, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008369943029344419, "rewards/wordcountpos_reward/raw_geo/std": 0.1808316663268733, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1152.1875, "completions/mean_terminated_length": 1102.5, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.5715143028605721, "frac_reward_zero_std": 0.0, "grad_norm": 3.0035351920265194, "kl": 0.0172882080078125, "learning_rate": 5.165654398693594e-07, "loss": 0.0222, "num_tokens": 125067918.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0011025667190552, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17086690471605642, "rewards/wordcountpos_reward/raw_geo/std": 0.09656064681106347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1177.625, "completions/mean_terminated_length": 1156.1334228515625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.5717143428685737, "frac_reward_zero_std": 0.0, "grad_norm": 2.896400356736182, "kl": 0.018890380859375, "learning_rate": 5.162520874789653e-07, "loss": -0.0195, "num_tokens": 125113088.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0558626651763916, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06459163193789193, "rewards/wordcountpos_reward/raw_geo/std": 0.10356718209002547, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0838870492807861, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1045.0625, "completions/mean_terminated_length": 1014.7333984375, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.5719143828765754, "frac_reward_zero_std": 0.0, "grad_norm": 3.6731387935493727, "kl": 0.021942138671875, "learning_rate": 5.159387515442068e-07, "loss": -0.0074, "num_tokens": 125154337.0, "reward": -3.725290298461914e-08, "reward_std": 1.0534989833831787, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16317876420466887, "rewards/wordcountpos_reward/raw_geo/std": 0.15839729854442305, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1058.4375, "completions/mean_terminated_length": 1058.4375, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.5721144228845769, "frac_reward_zero_std": 0.0, "grad_norm": 3.4510040020780584, "kl": 0.0150299072265625, "learning_rate": 5.156254322178675e-07, "loss": -0.014, "num_tokens": 125195168.0, "reward": 0.0, "reward_std": 0.7803539037704468, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03292331905815845, "rewards/wordcountpos_reward/raw_geo/std": 0.06146013200246413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1138.5625, "completions/mean_terminated_length": 1138.5625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.5723144628925785, "frac_reward_zero_std": 0.0, "grad_norm": 3.1918027695349322, "kl": 0.01776123046875, "learning_rate": 5.153121296527236e-07, "loss": -0.0087, "num_tokens": 125232985.0, "reward": 0.0, "reward_std": 0.8717073798179626, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22090554010779934, "rewards/wordcountpos_reward/raw_geo/std": 0.2648838525137555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1140.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 875.25, "completions/mean_terminated_length": 875.25, "completions/min_length": 730.0, "completions/min_terminated_length": 730.0, "epoch": 0.5725145029005801, "frac_reward_zero_std": 0.0, "grad_norm": 3.2145838782186877, "kl": 0.0144500732421875, "learning_rate": 5.149988440015429e-07, "loss": -0.0291, "num_tokens": 125273573.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8761434555053711, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04368179998320379, "rewards/wordcountpos_reward/raw_geo/std": 0.07627527049137012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1195.6875, "completions/mean_terminated_length": 1175.4000244140625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.5727145429085817, "frac_reward_zero_std": 0.0, "grad_norm": 3.2402534431394376, "kl": 0.01727294921875, "learning_rate": 5.146855754170846e-07, "loss": 0.0135, "num_tokens": 125312152.0, "reward": 0.0, "reward_std": 0.9516893625259399, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08634148589788869, "rewards/wordcountpos_reward/raw_geo/std": 0.060108640367554485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1348.9375, "completions/mean_terminated_length": 1197.875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.5729145829165834, "frac_reward_zero_std": 0.0, "grad_norm": 2.8243137671610956, "kl": 0.015228271484375, "learning_rate": 5.143723240521001e-07, "loss": -0.0452, "num_tokens": 125369071.0, "reward": 0.0, "reward_std": 0.6273353099822998, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0780741468126943, "rewards/wordcountpos_reward/raw_geo/std": 0.08889101350006916, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869923, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1058.25, "completions/mean_terminated_length": 1058.25, "completions/min_length": 616.0, "completions/min_terminated_length": 616.0, "epoch": 0.5731146229245849, "frac_reward_zero_std": 0.0, "grad_norm": 3.7387723555329044, "kl": 0.019866943359375, "learning_rate": 5.140590900593325e-07, "loss": -0.0085, "num_tokens": 125409651.0, "reward": -7.450580596923828e-09, "reward_std": 0.9596825242042542, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010148271172004958, "rewards/wordcountpos_reward/raw_geo/std": 0.02358513893327917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 898.0625, "completions/mean_terminated_length": 898.0625, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.5733146629325865, "frac_reward_zero_std": 0.0, "grad_norm": 3.6430354674169627, "kl": 0.019134521484375, "learning_rate": 5.137458735915154e-07, "loss": -0.0244, "num_tokens": 125447132.0, "reward": 7.450580596923828e-09, "reward_std": 1.0492675304412842, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13151402339950835, "rewards/wordcountpos_reward/raw_geo/std": 0.12597059379473016, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1289.6875, "completions/mean_terminated_length": 1259.6429443359375, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.5735147029405882, "frac_reward_zero_std": 0.0, "grad_norm": 2.6087140858918687, "kl": 0.013214111328125, "learning_rate": 5.134326748013752e-07, "loss": 0.0062, "num_tokens": 125497759.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0050699710845947, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04785834750601541, "rewards/wordcountpos_reward/raw_geo/std": 0.09518488305718516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1028.6875, "completions/mean_terminated_length": 1028.6875, "completions/min_length": 583.0, "completions/min_terminated_length": 583.0, "epoch": 0.5737147429485897, "frac_reward_zero_std": 0.0, "grad_norm": 3.4131426863350374, "kl": 0.0197601318359375, "learning_rate": 5.131194938416287e-07, "loss": -0.0373, "num_tokens": 125542402.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6077475547790527, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05781313375601872, "rewards/wordcountpos_reward/raw_geo/std": 0.08209843500721524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1177.125, "completions/mean_terminated_length": 1102.615478515625, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.5739147829565913, "frac_reward_zero_std": 0.0, "grad_norm": 2.6206084691800617, "kl": 0.014495849609375, "learning_rate": 5.128063308649846e-07, "loss": 0.0327, "num_tokens": 125579948.0, "reward": -5.960464477539063e-08, "reward_std": 0.724492609500885, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.032583370539300965, "rewards/wordcountpos_reward/raw_geo/std": 0.04197746057166209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1281.625, "completions/mean_terminated_length": 1231.2308349609375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.5741148229645929, "frac_reward_zero_std": 0.0, "grad_norm": 3.275487366439258, "kl": 0.029693603515625, "learning_rate": 5.124931860241423e-07, "loss": 0.0031, "num_tokens": 125626750.0, "reward": -2.9802322387695312e-08, "reward_std": 0.611338198184967, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3339416104726901, "rewards/wordcountpos_reward/raw_geo/std": 0.35136998425058874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792515, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 1121.6875, "completions/mean_terminated_length": 1121.6875, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.5743148629725945, "frac_reward_zero_std": 0.0, "grad_norm": 2.9007075378024023, "kl": 0.014984130859375, "learning_rate": 5.12180059471793e-07, "loss": -0.0032, "num_tokens": 125665433.0, "reward": 0.0, "reward_std": 0.8363651037216187, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08637199672893228, "rewards/wordcountpos_reward/raw_geo/std": 0.12210433932684261, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1182.0, "completions/max_terminated_length": 1182.0, "completions/mean_length": 896.3125, "completions/mean_terminated_length": 896.3125, "completions/min_length": 653.0, "completions/min_terminated_length": 653.0, "epoch": 0.5745149029805962, "frac_reward_zero_std": 0.0, "grad_norm": 3.203696012485754, "kl": 0.01666259765625, "learning_rate": 5.118669513606182e-07, "loss": -0.0506, "num_tokens": 125696014.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0356762409210205, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.039010079924559404, "rewards/wordcountpos_reward/raw_geo/std": 0.0587612025193735, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.24821585588175288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1046.9375, "completions/mean_terminated_length": 1046.9375, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.5747149429885977, "frac_reward_zero_std": 0.0, "grad_norm": 3.8867290009772604, "kl": 0.02001953125, "learning_rate": 5.115538618432912e-07, "loss": 0.0169, "num_tokens": 125750701.0, "reward": 0.0, "reward_std": 0.9097800254821777, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05752674114377659, "rewards/wordcountpos_reward/raw_geo/std": 0.14296613260350402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1353.25, "completions/mean_terminated_length": 1319.3846435546875, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.5749149829965993, "frac_reward_zero_std": 0.0, "grad_norm": 2.975060960673274, "kl": 0.022216796875, "learning_rate": 5.112407910724757e-07, "loss": -0.0063, "num_tokens": 125801313.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8863094449043274, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17218811021058475, "rewards/wordcountpos_reward/raw_geo/std": 0.2932130777675942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1009.75, "completions/mean_terminated_length": 1009.75, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.5751150230046009, "frac_reward_zero_std": 0.0, "grad_norm": 2.395249367403479, "kl": 0.0135955810546875, "learning_rate": 5.109277392008265e-07, "loss": -0.057, "num_tokens": 125845589.0, "reward": 0.0, "reward_std": 0.9256553053855896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.060616241654326726, "rewards/wordcountpos_reward/raw_geo/std": 0.09064484032400459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 1022.0, "completions/mean_terminated_length": 1022.0, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.5753150630126025, "frac_reward_zero_std": 0.0, "grad_norm": 2.2590444542529635, "kl": 0.0076141357421875, "learning_rate": 5.106147063809892e-07, "loss": 0.0291, "num_tokens": 125880813.0, "reward": -2.9802322387695312e-08, "reward_std": 0.850509524345398, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03762694036469122, "rewards/wordcountpos_reward/raw_geo/std": 0.11080154089781118, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 881.25, "completions/mean_terminated_length": 881.25, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.5755151030206042, "frac_reward_zero_std": 0.0, "grad_norm": 3.2731988435120805, "kl": 0.0143280029296875, "learning_rate": 5.103016927655997e-07, "loss": -0.0486, "num_tokens": 125908945.0, "reward": -2.9802322387695312e-08, "reward_std": 0.949012279510498, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026562933264865356, "rewards/wordcountpos_reward/raw_geo/std": 0.042103187287810366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1260.625, "completions/mean_terminated_length": 1244.666748046875, "completions/min_length": 1081.0, "completions/min_terminated_length": 1081.0, "epoch": 0.5757151430286057, "frac_reward_zero_std": 0.0, "grad_norm": 2.785943772871435, "kl": 0.013275146484375, "learning_rate": 5.099886985072853e-07, "loss": -0.0141, "num_tokens": 125954891.0, "reward": 2.9802322387695312e-08, "reward_std": 1.036488652229309, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09589427961434424, "rewards/wordcountpos_reward/raw_geo/std": 0.11400802207828194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.06070572613176774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1052.0, "completions/max_terminated_length": 1052.0, "completions/mean_length": 916.0625, "completions/mean_terminated_length": 916.0625, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.5759151830366073, "frac_reward_zero_std": 0.0, "grad_norm": 3.347214521821343, "kl": 0.0164031982421875, "learning_rate": 5.09675723758663e-07, "loss": -0.0067, "num_tokens": 125991508.0, "reward": 7.450580596923828e-09, "reward_std": 1.0144084692001343, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.028633334415362907, "rewards/wordcountpos_reward/raw_geo/std": 0.11253128410859799, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1202.3125, "completions/mean_terminated_length": 1202.3125, "completions/min_length": 1044.0, "completions/min_terminated_length": 1044.0, "epoch": 0.5761152230446089, "frac_reward_zero_std": 0.0, "grad_norm": 2.6075079085938166, "kl": 0.01280975341796875, "learning_rate": 5.093627686723411e-07, "loss": 0.0141, "num_tokens": 126033577.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9560786485671997, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03679939040037008, "rewards/wordcountpos_reward/raw_geo/std": 0.0903614714084996, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1177.1875, "completions/mean_terminated_length": 1177.1875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.5763152630526105, "frac_reward_zero_std": 0.0, "grad_norm": 3.155480768291193, "kl": 0.0182647705078125, "learning_rate": 5.090498334009177e-07, "loss": -0.0483, "num_tokens": 126078572.0, "reward": 0.0, "reward_std": 0.9326386451721191, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18190478033045693, "rewards/wordcountpos_reward/raw_geo/std": 0.1656737457378989, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1235.3125, "completions/mean_terminated_length": 1235.3125, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.5765153030606122, "frac_reward_zero_std": 0.0, "grad_norm": 3.2767077950358505, "kl": 0.021636962890625, "learning_rate": 5.087369180969812e-07, "loss": -0.0309, "num_tokens": 126120409.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9428821802139282, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0046293785058308876, "rewards/wordcountpos_reward/raw_geo/std": 0.07605434300877509, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1044.6875, "completions/mean_terminated_length": 1044.6875, "completions/min_length": 567.0, "completions/min_terminated_length": 567.0, "epoch": 0.5767153430686137, "frac_reward_zero_std": 0.0, "grad_norm": 3.6411656246338624, "kl": 0.02734375, "learning_rate": 5.084240229131111e-07, "loss": -0.0727, "num_tokens": 126150900.0, "reward": 0.0, "reward_std": 0.9517310261726379, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.009292390731288173, "rewards/wordcountpos_reward/raw_geo/std": 0.037176427585696066, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1186.25, "completions/mean_terminated_length": 1141.4285888671875, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.5769153830766153, "frac_reward_zero_std": 0.0, "grad_norm": 3.063242102178717, "kl": 0.0194549560546875, "learning_rate": 5.081111480018761e-07, "loss": 0.0008, "num_tokens": 126203880.0, "reward": 0.0, "reward_std": 0.8646323680877686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07612988241859954, "rewards/wordcountpos_reward/raw_geo/std": 0.1609235930515221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1410.5, "completions/mean_terminated_length": 1369.8182373046875, "completions/min_length": 1206.0, "completions/min_terminated_length": 1206.0, "epoch": 0.5771154230846169, "frac_reward_zero_std": 0.0, "grad_norm": 2.7170779921912094, "kl": 0.0146636962890625, "learning_rate": 5.077982935158357e-07, "loss": -0.01, "num_tokens": 126261008.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6194419264793396, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03577168790919514, "rewards/wordcountpos_reward/raw_geo/std": 0.09562739833954298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1061.3125, "completions/mean_terminated_length": 1061.3125, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.5773154630926185, "frac_reward_zero_std": 0.0, "grad_norm": 3.4502296696962214, "kl": 0.01983642578125, "learning_rate": 5.074854596075388e-07, "loss": 0.0009, "num_tokens": 126308405.0, "reward": 0.0, "reward_std": 0.8748435974121094, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03724898268091636, "rewards/wordcountpos_reward/raw_geo/std": 0.12191606209338662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1180.3125, "completions/mean_terminated_length": 1159.0, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.5775155031006202, "frac_reward_zero_std": 0.0, "grad_norm": 3.3075877767147284, "kl": 0.022308349609375, "learning_rate": 5.071726464295251e-07, "loss": 0.034, "num_tokens": 126349362.0, "reward": 0.0, "reward_std": 0.5393147468566895, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.27304181204437755, "rewards/wordcountpos_reward/raw_geo/std": 0.5291074572485099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 983.3125, "completions/mean_terminated_length": 983.3125, "completions/min_length": 652.0, "completions/min_terminated_length": 652.0, "epoch": 0.5777155431086217, "frac_reward_zero_std": 0.0, "grad_norm": 2.408418712093975, "kl": 0.0132598876953125, "learning_rate": 5.068598541343234e-07, "loss": -0.0055, "num_tokens": 126393975.0, "reward": 0.0, "reward_std": 0.7851936221122742, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05452327591344851, "rewards/wordcountpos_reward/raw_geo/std": 0.1879731113048992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1106.3125, "completions/mean_terminated_length": 1106.3125, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.5779155831166233, "frac_reward_zero_std": 0.0, "grad_norm": 3.0970753206364905, "kl": 0.014801025390625, "learning_rate": 5.065470828744525e-07, "loss": -0.0111, "num_tokens": 126443444.0, "reward": 0.0, "reward_std": 0.7781053185462952, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1895868770342986, "rewards/wordcountpos_reward/raw_geo/std": 0.19604032115914047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1135.8125, "completions/mean_terminated_length": 1083.7857666015625, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.5781156231246249, "frac_reward_zero_std": 0.0, "grad_norm": 3.651713681445246, "kl": 0.019866943359375, "learning_rate": 5.062343328024216e-07, "loss": -0.0266, "num_tokens": 126495633.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0185843706130981, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1814690430891859, "rewards/wordcountpos_reward/raw_geo/std": 0.1173928708041594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 991.0000610351562, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.5783156631326265, "frac_reward_zero_std": 0.0, "grad_norm": 3.8199233296621538, "kl": 0.02154541015625, "learning_rate": 5.059216040707284e-07, "loss": 0.0003, "num_tokens": 126536702.0, "reward": 0.0, "reward_std": 0.7121326923370361, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08319988153070458, "rewards/wordcountpos_reward/raw_geo/std": 0.09148993961967797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.16187558093703852, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1072.1875, "completions/mean_terminated_length": 1072.1875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5785157031406282, "frac_reward_zero_std": 0.0, "grad_norm": 3.6277954308785363, "kl": 0.0172882080078125, "learning_rate": 5.056088968318616e-07, "loss": 0.0326, "num_tokens": 126571209.0, "reward": 4.470348358154297e-08, "reward_std": 1.0366958379745483, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007466489344815847, "rewards/wordcountpos_reward/raw_geo/std": 0.026809298310598514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1196.875, "completions/mean_terminated_length": 893.75, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.5787157431486297, "frac_reward_zero_std": 0.0, "grad_norm": 2.9081149833695634, "kl": 0.0167083740234375, "learning_rate": 5.05296211238298e-07, "loss": -0.0328, "num_tokens": 126627599.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9521926045417786, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10236683392868082, "rewards/wordcountpos_reward/raw_geo/std": 0.08768413039672335, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1076.0625, "completions/mean_terminated_length": 1015.5000610351562, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.5789157831566313, "frac_reward_zero_std": 0.0, "grad_norm": 2.7675812303636573, "kl": 0.017822265625, "learning_rate": 5.049835474425049e-07, "loss": -0.1526, "num_tokens": 126674160.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9649653434753418, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08421504980933846, "rewards/wordcountpos_reward/raw_geo/std": 0.23617953922686713, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.18089284734953515, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 954.0, "completions/mean_terminated_length": 917.6000366210938, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.579115823164633, "frac_reward_zero_std": 0.0, "grad_norm": 3.581927721114051, "kl": 0.019256591796875, "learning_rate": 5.046709055969383e-07, "loss": 0.0055, "num_tokens": 126717464.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8534067869186401, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08369160899370734, "rewards/wordcountpos_reward/raw_geo/std": 0.25442349612385085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.19321835661585918, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1093.5, "completions/mean_terminated_length": 1066.4000244140625, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.5793158631726345, "frac_reward_zero_std": 0.0, "grad_norm": 3.3975110365527583, "kl": 0.0163116455078125, "learning_rate": 5.043582858540443e-07, "loss": 0.0242, "num_tokens": 126748776.0, "reward": 0.0, "reward_std": 0.9173611998558044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04922554382464631, "rewards/wordcountpos_reward/raw_geo/std": 0.08088777471701823, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982526, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1177.375, "completions/mean_terminated_length": 1155.86669921875, "completions/min_length": 518.0, "completions/min_terminated_length": 518.0, "epoch": 0.5795159031806362, "frac_reward_zero_std": 0.0, "grad_norm": 2.7776569001870817, "kl": 0.0145416259765625, "learning_rate": 5.040456883662571e-07, "loss": -0.0213, "num_tokens": 126801758.0, "reward": 0.0, "reward_std": 0.9295987486839294, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.002495700999150998, "rewards/wordcountpos_reward/raw_geo/std": 0.1382260255402544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1086.0, "completions/max_terminated_length": 1086.0, "completions/mean_length": 840.6875, "completions/mean_terminated_length": 840.6875, "completions/min_length": 565.0, "completions/min_terminated_length": 565.0, "epoch": 0.5797159431886377, "frac_reward_zero_std": 0.0, "grad_norm": 3.8406063869053555, "kl": 0.017303466796875, "learning_rate": 5.037331132860013e-07, "loss": -0.0716, "num_tokens": 126827361.0, "reward": 0.0, "reward_std": 0.6080427169799805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015201405051275523, "rewards/wordcountpos_reward/raw_geo/std": 0.08803067037404982, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 898.375, "completions/mean_terminated_length": 898.375, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.5799159831966393, "frac_reward_zero_std": 0.0, "grad_norm": 3.4955189760956134, "kl": 0.016937255859375, "learning_rate": 5.034205607656892e-07, "loss": 0.0335, "num_tokens": 126855935.0, "reward": 0.0, "reward_std": 0.4408969581127167, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04657463689088292, "rewards/wordcountpos_reward/raw_geo/std": 0.19896410930927322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1121.4375, "completions/mean_terminated_length": 1121.4375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.580116023204641, "frac_reward_zero_std": 0.0, "grad_norm": 2.7471785589731064, "kl": 0.017578125, "learning_rate": 5.031080309577232e-07, "loss": -0.0065, "num_tokens": 126898878.0, "reward": 0.0, "reward_std": 0.558305025100708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04425015185646269, "rewards/wordcountpos_reward/raw_geo/std": 0.24867707349347132, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1175.86669921875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.5803160632126425, "frac_reward_zero_std": 0.0, "grad_norm": 2.818556210370417, "kl": 0.012969970703125, "learning_rate": 5.027955240144944e-07, "loss": 0.0148, "num_tokens": 126940216.0, "reward": 0.0, "reward_std": 0.49796804785728455, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11166088164325326, "rewards/wordcountpos_reward/raw_geo/std": 0.11168431905756729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1080.625, "completions/mean_terminated_length": 1080.625, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.5805161032206442, "frac_reward_zero_std": 0.0, "grad_norm": 3.304548829127565, "kl": 0.0182037353515625, "learning_rate": 5.024830400883823e-07, "loss": -0.0028, "num_tokens": 126972962.0, "reward": 0.0, "reward_std": 0.9358429312705994, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03069143177137798, "rewards/wordcountpos_reward/raw_geo/std": 0.03882444818622347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.21221931353445905, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1241.625, "completions/mean_terminated_length": 1086.5999755859375, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.5807161432286457, "frac_reward_zero_std": 0.0, "grad_norm": 2.5297916177907056, "kl": 0.019989013671875, "learning_rate": 5.021705793317556e-07, "loss": 0.0255, "num_tokens": 127027228.0, "reward": 1.862645149230957e-08, "reward_std": 1.050182819366455, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12872624888761058, "rewards/wordcountpos_reward/raw_geo/std": 0.10965940480902837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.14343665526661611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1092.1875, "completions/mean_terminated_length": 1065.0, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.5809161832366473, "frac_reward_zero_std": 0.0, "grad_norm": 3.7230026125777935, "kl": 0.022186279296875, "learning_rate": 5.018581418969719e-07, "loss": -0.0255, "num_tokens": 127070327.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6065070629119873, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07965046583719053, "rewards/wordcountpos_reward/raw_geo/std": 0.07912014045875158, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16815997674172586, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1038.6875, "completions/mean_terminated_length": 1038.6875, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.581116223244649, "frac_reward_zero_std": 0.0, "grad_norm": 3.8856640774569087, "kl": 0.020294189453125, "learning_rate": 5.015457279363766e-07, "loss": -0.0544, "num_tokens": 127122802.0, "reward": 3.725290298461914e-08, "reward_std": 1.0349830389022827, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023688615058893532, "rewards/wordcountpos_reward/raw_geo/std": 0.039610859524090755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1113.4375, "completions/mean_terminated_length": 1113.4375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.5813162632526505, "frac_reward_zero_std": 0.0, "grad_norm": 3.407178829781519, "kl": 0.020599365234375, "learning_rate": 5.012333376023044e-07, "loss": 0.0096, "num_tokens": 127168457.0, "reward": 0.0, "reward_std": 0.594805121421814, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17048764860652083, "rewards/wordcountpos_reward/raw_geo/std": 0.15891902137861985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1450.25, "completions/mean_terminated_length": 1400.5, "completions/min_length": 1220.0, "completions/min_terminated_length": 1220.0, "epoch": 0.5815163032606522, "frac_reward_zero_std": 0.0, "grad_norm": 2.428458939658596, "kl": 0.0147552490234375, "learning_rate": 5.009209710470781e-07, "loss": 0.0115, "num_tokens": 127230101.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0004221200942993, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15472147058500874, "rewards/wordcountpos_reward/raw_geo/std": 0.06549666413119719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1178.0, "completions/mean_terminated_length": 1178.0, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.5817163432686537, "frac_reward_zero_std": 0.0, "grad_norm": 2.448289320168535, "kl": 0.01336669921875, "learning_rate": 5.006086284230094e-07, "loss": 0.0303, "num_tokens": 127278941.0, "reward": 0.0, "reward_std": 0.5355027318000793, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2507746735342252, "rewards/wordcountpos_reward/raw_geo/std": 0.07824642096173384, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1403.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1076.125, "completions/mean_terminated_length": 1076.125, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.5819163832766553, "frac_reward_zero_std": 0.0, "grad_norm": 2.771859082600216, "kl": 0.0163116455078125, "learning_rate": 5.002963098823976e-07, "loss": 0.0353, "num_tokens": 127319271.0, "reward": 0.0, "reward_std": 0.723395049571991, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0425015081245416, "rewards/wordcountpos_reward/raw_geo/std": 0.08411680697482908, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1234.75, "completions/mean_terminated_length": 1217.0667724609375, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.582116423284657, "frac_reward_zero_std": 0.0, "grad_norm": 3.309826721151294, "kl": 0.023162841796875, "learning_rate": 4.999840155775309e-07, "loss": 0.0164, "num_tokens": 127353755.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8859012126922607, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0027152589559524907, "rewards/wordcountpos_reward/raw_geo/std": 0.08427367127828711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1191.9375, "completions/mean_terminated_length": 1191.9375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.5823164632926585, "frac_reward_zero_std": 0.0, "grad_norm": 3.0858151399487275, "kl": 0.0176544189453125, "learning_rate": 4.996717456606852e-07, "loss": -0.0726, "num_tokens": 127403570.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0447947978973389, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06693125617801163, "rewards/wordcountpos_reward/raw_geo/std": 0.1598868399941056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1076.0625, "completions/mean_terminated_length": 1076.0625, "completions/min_length": 541.0, "completions/min_terminated_length": 541.0, "epoch": 0.5825165033006602, "frac_reward_zero_std": 0.0, "grad_norm": 3.611206044738789, "kl": 0.02410888671875, "learning_rate": 4.99359500284125e-07, "loss": 0.032, "num_tokens": 127457275.0, "reward": 0.0, "reward_std": 0.9942271709442139, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12332546247899819, "rewards/wordcountpos_reward/raw_geo/std": 0.21657637721778744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 903.9375, "completions/mean_terminated_length": 903.9375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.5827165433086617, "frac_reward_zero_std": 0.0, "grad_norm": 2.887952638561367, "kl": 0.01409912109375, "learning_rate": 4.990472796001023e-07, "loss": -0.0195, "num_tokens": 127483298.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9166451096534729, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04605330893522248, "rewards/wordcountpos_reward/raw_geo/std": 0.033399754502156404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 977.0625, "completions/mean_terminated_length": 977.0625, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.5829165833166633, "frac_reward_zero_std": 0.0, "grad_norm": 3.4679122213496534, "kl": 0.014678955078125, "learning_rate": 4.987350837608576e-07, "loss": 0.0071, "num_tokens": 127517667.0, "reward": 2.9802322387695312e-08, "reward_std": 0.569913387298584, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15826540701211628, "rewards/wordcountpos_reward/raw_geo/std": 0.15492293299343166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1095.875, "completions/mean_terminated_length": 1068.933349609375, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.583116623324665, "frac_reward_zero_std": 0.0, "grad_norm": 2.948936100351661, "kl": 0.012298583984375, "learning_rate": 4.984229129186187e-07, "loss": -0.0356, "num_tokens": 127560249.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8583228588104248, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03202725579360621, "rewards/wordcountpos_reward/raw_geo/std": 0.2973459265297517, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1062.5, "completions/mean_terminated_length": 1062.5, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.5833166633326665, "frac_reward_zero_std": 0.0, "grad_norm": 3.171303688144934, "kl": 0.0145111083984375, "learning_rate": 4.981107672256018e-07, "loss": 0.0575, "num_tokens": 127597369.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0630252361297607, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03212077726949154, "rewards/wordcountpos_reward/raw_geo/std": 0.10804365630939668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1014.4375, "completions/mean_terminated_length": 1014.4375, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.5835167033406682, "frac_reward_zero_std": 0.0, "grad_norm": 2.370324298716614, "kl": 0.01312255859375, "learning_rate": 4.977986468340102e-07, "loss": -0.0297, "num_tokens": 127630312.0, "reward": 0.0, "reward_std": 0.876937747001648, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.008789455659600005, "rewards/wordcountpos_reward/raw_geo/std": 0.12215068146355017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327549, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1101.5625, "completions/mean_terminated_length": 1009.6154174804688, "completions/min_length": 574.0, "completions/min_terminated_length": 574.0, "epoch": 0.5837167433486697, "frac_reward_zero_std": 0.0, "grad_norm": 3.6204837335669233, "kl": 0.019439697265625, "learning_rate": 4.974865518960356e-07, "loss": -0.0488, "num_tokens": 127677889.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0662357807159424, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02816228629689778, "rewards/wordcountpos_reward/raw_geo/std": 0.07472683510793504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1097.8125, "completions/mean_terminated_length": 1097.8125, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.5839167833566713, "frac_reward_zero_std": 0.0, "grad_norm": 3.6974600759297744, "kl": 0.022247314453125, "learning_rate": 4.971744825638567e-07, "loss": -0.0334, "num_tokens": 127721062.0, "reward": 0.0, "reward_std": 0.4363172650337219, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07848332964177085, "rewards/wordcountpos_reward/raw_geo/std": 0.1624090759842044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1111.25, "completions/mean_terminated_length": 1111.25, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.584116823364673, "frac_reward_zero_std": 0.0, "grad_norm": 3.7459806954778143, "kl": 0.0179595947265625, "learning_rate": 4.968624389896401e-07, "loss": -0.0048, "num_tokens": 127767650.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9427498579025269, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012430928055407981, "rewards/wordcountpos_reward/raw_geo/std": 0.390696574429716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1305.125, "completions/mean_terminated_length": 1292.1334228515625, "completions/min_length": 1121.0, "completions/min_terminated_length": 1121.0, "epoch": 0.5843168633726745, "frac_reward_zero_std": 0.0, "grad_norm": 2.323720699247139, "kl": 0.0117340087890625, "learning_rate": 4.965504213255394e-07, "loss": -0.007, "num_tokens": 127817268.0, "reward": -7.450580596923828e-09, "reward_std": 1.0236223936080933, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010449152202210392, "rewards/wordcountpos_reward/raw_geo/std": 0.09854105556743352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1105.6875, "completions/mean_terminated_length": 1105.6875, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.5845169033806762, "frac_reward_zero_std": 0.0, "grad_norm": 3.0208018877105354, "kl": 0.0178985595703125, "learning_rate": 4.96238429723696e-07, "loss": -0.0139, "num_tokens": 127862247.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7607231140136719, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2593789827874581, "rewards/wordcountpos_reward/raw_geo/std": 0.1136588806586169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1281.8125, "completions/mean_terminated_length": 1267.2667236328125, "completions/min_length": 1117.0, "completions/min_terminated_length": 1117.0, "epoch": 0.5847169433886777, "frac_reward_zero_std": 0.0, "grad_norm": 2.5734594748302513, "kl": 0.01263427734375, "learning_rate": 4.959264643362384e-07, "loss": 0.0098, "num_tokens": 127906964.0, "reward": -1.4901161193847656e-08, "reward_std": 0.983154296875, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05362749789916609, "rewards/wordcountpos_reward/raw_geo/std": 0.13959322793248535, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1173.625, "completions/mean_terminated_length": 1173.625, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.5849169833966793, "frac_reward_zero_std": 0.0, "grad_norm": 2.771102074078157, "kl": 0.0154876708984375, "learning_rate": 4.956145253152823e-07, "loss": -0.0081, "num_tokens": 127955966.0, "reward": 0.0, "reward_std": 0.7952161431312561, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14138372358241064, "rewards/wordcountpos_reward/raw_geo/std": 0.126808028919442, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1132.125, "completions/mean_terminated_length": 1132.125, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.585117023404681, "frac_reward_zero_std": 0.0, "grad_norm": 2.5495735909837483, "kl": 0.013427734375, "learning_rate": 4.953026128129308e-07, "loss": 0.0195, "num_tokens": 128004536.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8520119786262512, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17163856589676968, "rewards/wordcountpos_reward/raw_geo/std": 0.23352564139145396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1136.25, "completions/mean_terminated_length": 1136.25, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.5853170634126825, "frac_reward_zero_std": 0.0, "grad_norm": 3.5768484869347144, "kl": 0.020904541015625, "learning_rate": 4.949907269812735e-07, "loss": 0.0015, "num_tokens": 128045476.0, "reward": 0.0, "reward_std": 0.8267281651496887, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.045567736499576794, "rewards/wordcountpos_reward/raw_geo/std": 0.08794986215706542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1241.5625, "completions/mean_terminated_length": 1181.923095703125, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.5855171034206841, "frac_reward_zero_std": 0.0, "grad_norm": 3.3179390201143963, "kl": 0.0194091796875, "learning_rate": 4.946788679723875e-07, "loss": -0.005, "num_tokens": 128089397.0, "reward": 0.0, "reward_std": 0.8485996127128601, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14018339837224825, "rewards/wordcountpos_reward/raw_geo/std": 0.15885014211128837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1014.625, "completions/mean_terminated_length": 1014.625, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.5857171434286857, "frac_reward_zero_std": 0.0, "grad_norm": 3.7445293150288754, "kl": 0.0242919921875, "learning_rate": 4.943670359383368e-07, "loss": -0.0384, "num_tokens": 128126655.0, "reward": 5.960464477539063e-08, "reward_std": 0.7084122896194458, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0007603556722554296, "rewards/wordcountpos_reward/raw_geo/std": 0.07854840777415577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1376.75, "completions/mean_terminated_length": 1253.5, "completions/min_length": 1124.0, "completions/min_terminated_length": 1124.0, "epoch": 0.5859171834366873, "frac_reward_zero_std": 0.0, "grad_norm": 2.635408349279786, "kl": 0.0166778564453125, "learning_rate": 4.940552310311718e-07, "loss": 0.0108, "num_tokens": 128185771.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8104726076126099, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06461988754122869, "rewards/wordcountpos_reward/raw_geo/std": 0.11081721572312252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 984.8125, "completions/mean_terminated_length": 950.4667358398438, "completions/min_length": 762.0, "completions/min_terminated_length": 762.0, "epoch": 0.586117223444689, "frac_reward_zero_std": 0.0, "grad_norm": 3.9307948933074996, "kl": 0.024505615234375, "learning_rate": 4.937434534029304e-07, "loss": -0.0069, "num_tokens": 128225856.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9545694589614868, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03835541809408087, "rewards/wordcountpos_reward/raw_geo/std": 0.0879647147313885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.2652392351290658, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 967.125, "completions/mean_terminated_length": 967.125, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.5863172634526905, "frac_reward_zero_std": 0.0, "grad_norm": 2.3820739669628255, "kl": 0.00995635986328125, "learning_rate": 4.934317032056365e-07, "loss": -0.0355, "num_tokens": 128259034.0, "reward": 0.0, "reward_std": 0.9153488874435425, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06986500133936903, "rewards/wordcountpos_reward/raw_geo/std": 0.10804558186404627, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1330552655993129, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1184.375, "completions/mean_terminated_length": 1139.2857666015625, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.5865173034606921, "frac_reward_zero_std": 0.0, "grad_norm": 3.13425801636311, "kl": 0.016876220703125, "learning_rate": 4.931199805913011e-07, "loss": 0.0192, "num_tokens": 128299136.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9250167608261108, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0064987683655425305, "rewards/wordcountpos_reward/raw_geo/std": 0.12204582564301925, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1232.75, "completions/mean_terminated_length": 1214.933349609375, "completions/min_length": 1033.0, "completions/min_terminated_length": 1033.0, "epoch": 0.5867173434686938, "frac_reward_zero_std": 0.0, "grad_norm": 3.403565217563238, "kl": 0.019500732421875, "learning_rate": 4.928082857119212e-07, "loss": -0.0219, "num_tokens": 128347484.0, "reward": -2.9802322387695312e-08, "reward_std": 0.576077938079834, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13361207450047827, "rewards/wordcountpos_reward/raw_geo/std": 0.2691332306878183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1292.8125, "completions/mean_terminated_length": 1245.0, "completions/min_length": 1077.0, "completions/min_terminated_length": 1077.0, "epoch": 0.5869173834766953, "frac_reward_zero_std": 0.0, "grad_norm": 3.561115609640162, "kl": 0.021942138671875, "learning_rate": 4.924966187194811e-07, "loss": -0.0133, "num_tokens": 128398441.0, "reward": 0.0, "reward_std": 0.5309121608734131, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24032194998045364, "rewards/wordcountpos_reward/raw_geo/std": 0.25595983398822897, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 986.75, "completions/mean_terminated_length": 986.75, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.587117423484697, "frac_reward_zero_std": 0.0, "grad_norm": 2.9574750734271444, "kl": 0.018646240234375, "learning_rate": 4.921849797659508e-07, "loss": -0.0087, "num_tokens": 128440549.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9660455584526062, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04749196412947569, "rewards/wordcountpos_reward/raw_geo/std": 0.152198356867434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1245.0625, "completions/mean_terminated_length": 1092.0999755859375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.5873174634926985, "frac_reward_zero_std": 0.0, "grad_norm": 2.8610927713631273, "kl": 0.01715087890625, "learning_rate": 4.918733690032868e-07, "loss": -0.0309, "num_tokens": 128494550.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9418723583221436, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04693082857998903, "rewards/wordcountpos_reward/raw_geo/std": 0.07612171631501917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1161.0, "completions/mean_terminated_length": 1138.4000244140625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.5875175035007001, "frac_reward_zero_std": 0.0, "grad_norm": 3.454920343692838, "kl": 0.02069091796875, "learning_rate": 4.915617865834319e-07, "loss": -0.025, "num_tokens": 128533990.0, "reward": 0.0, "reward_std": 0.6108978986740112, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20893634462182237, "rewards/wordcountpos_reward/raw_geo/std": 0.18526855110056076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1087.875, "completions/mean_terminated_length": 1087.875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.5877175435087018, "frac_reward_zero_std": 0.0, "grad_norm": 3.7496391945356726, "kl": 0.0262451171875, "learning_rate": 4.912502326583157e-07, "loss": -0.0697, "num_tokens": 128585756.0, "reward": 0.0, "reward_std": 1.0486235618591309, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13307876063289864, "rewards/wordcountpos_reward/raw_geo/std": 0.15528996027103104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1192.1875, "completions/mean_terminated_length": 1121.1539306640625, "completions/min_length": 692.0, "completions/min_terminated_length": 692.0, "epoch": 0.5879175835167033, "frac_reward_zero_std": 0.0, "grad_norm": 2.804337233698431, "kl": 0.023681640625, "learning_rate": 4.909387073798523e-07, "loss": -0.0285, "num_tokens": 128626063.0, "reward": 0.0, "reward_std": 0.7643004655838013, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07571619841295281, "rewards/wordcountpos_reward/raw_geo/std": 0.05688560305969133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1189.4375, "completions/mean_terminated_length": 1189.4375, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.588117623524705, "frac_reward_zero_std": 0.0, "grad_norm": 2.316996932255133, "kl": 0.0119781494140625, "learning_rate": 4.906272108999437e-07, "loss": -0.0248, "num_tokens": 128670966.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8999022841453552, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05130192820363911, "rewards/wordcountpos_reward/raw_geo/std": 0.12661885206495976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1120.625, "completions/mean_terminated_length": 1095.3333740234375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.5883176635327065, "frac_reward_zero_std": 0.0, "grad_norm": 3.578191391979515, "kl": 0.02203369140625, "learning_rate": 4.903157433704767e-07, "loss": -0.0279, "num_tokens": 128720880.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8246440291404724, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08864118456500737, "rewards/wordcountpos_reward/raw_geo/std": 0.15574157404583677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1133.6875, "completions/mean_terminated_length": 1109.2667236328125, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.5885177035407081, "frac_reward_zero_std": 0.0, "grad_norm": 2.907184937687243, "kl": 0.014312744140625, "learning_rate": 4.900043049433241e-07, "loss": 0.004, "num_tokens": 128758627.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5109232664108276, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08446188308699835, "rewards/wordcountpos_reward/raw_geo/std": 0.09360595750109865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1302.625, "completions/mean_terminated_length": 1257.0770263671875, "completions/min_length": 1146.0, "completions/min_terminated_length": 1146.0, "epoch": 0.5887177435487098, "frac_reward_zero_std": 0.0, "grad_norm": 2.6676124672261077, "kl": 0.0148773193359375, "learning_rate": 4.896928957703449e-07, "loss": -0.0096, "num_tokens": 128812189.0, "reward": 0.0, "reward_std": 0.5974677801132202, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04285772132860794, "rewards/wordcountpos_reward/raw_geo/std": 0.11287068270546147, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1197.5625, "completions/mean_terminated_length": 1096.75, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.5889177835567113, "frac_reward_zero_std": 0.0, "grad_norm": 2.912012638923016, "kl": 0.014373779296875, "learning_rate": 4.893815160033834e-07, "loss": -0.0252, "num_tokens": 128850926.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0351181030273438, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009667762578107296, "rewards/wordcountpos_reward/raw_geo/std": 0.05004809211439127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1241.625, "completions/mean_terminated_length": 1155.5, "completions/min_length": 806.0, "completions/min_terminated_length": 806.0, "epoch": 0.589117823564713, "frac_reward_zero_std": 0.0, "grad_norm": 2.884235417875095, "kl": 0.01629638671875, "learning_rate": 4.890701657942703e-07, "loss": -0.0031, "num_tokens": 128904384.0, "reward": 0.0, "reward_std": 0.7180237174034119, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06865530237105563, "rewards/wordcountpos_reward/raw_geo/std": 0.16128351689483508, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1368.5, "completions/mean_terminated_length": 1338.1539306640625, "completions/min_length": 1157.0, "completions/min_terminated_length": 1157.0, "epoch": 0.5893178635727145, "frac_reward_zero_std": 0.0, "grad_norm": 2.41507973954685, "kl": 0.0142669677734375, "learning_rate": 4.887588452948207e-07, "loss": -0.0205, "num_tokens": 128946184.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7883151173591614, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04497441270871488, "rewards/wordcountpos_reward/raw_geo/std": 0.07446201374375123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1217.25, "completions/mean_terminated_length": 1123.0, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.5895179035807161, "frac_reward_zero_std": 0.0, "grad_norm": 3.039112290167763, "kl": 0.018402099609375, "learning_rate": 4.884475546568365e-07, "loss": -0.017, "num_tokens": 128993620.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7988297939300537, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10738163772031713, "rewards/wordcountpos_reward/raw_geo/std": 0.07226108961539644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 896.5, "completions/mean_terminated_length": 896.5, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.5897179435887178, "frac_reward_zero_std": 0.0, "grad_norm": 2.422445540037207, "kl": 0.01116943359375, "learning_rate": 4.881362940321039e-07, "loss": -0.0331, "num_tokens": 129026892.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9787921905517578, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041536564587875566, "rewards/wordcountpos_reward/raw_geo/std": 0.05127293820689761, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 1017.0625, "completions/mean_terminated_length": 1017.0625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.5899179835967193, "frac_reward_zero_std": 0.0, "grad_norm": 3.340727771962041, "kl": 0.0166473388671875, "learning_rate": 4.878250635723954e-07, "loss": -0.0056, "num_tokens": 129073093.0, "reward": 0.0, "reward_std": 1.0484566688537598, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010931921489157087, "rewards/wordcountpos_reward/raw_geo/std": 0.06924733394943244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1197.6875, "completions/mean_terminated_length": 1177.533447265625, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.590118023604721, "frac_reward_zero_std": 0.0, "grad_norm": 2.469104294880385, "kl": 0.0104217529296875, "learning_rate": 4.875138634294681e-07, "loss": -0.0082, "num_tokens": 129112256.0, "reward": 1.6763806343078613e-08, "reward_std": 1.0622138977050781, "rewards/wordcountpos_reward/mean": 1.6763806343078613e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.053127129666433286, "rewards/wordcountpos_reward/raw_geo/std": 0.07673082997012118, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1434.125, "completions/mean_terminated_length": 1349.4285888671875, "completions/min_length": 1200.0, "completions/min_terminated_length": 1200.0, "epoch": 0.5903180636127225, "frac_reward_zero_std": 0.0, "grad_norm": 1.9487251657376934, "kl": 0.0128936767578125, "learning_rate": 4.87202693755065e-07, "loss": -0.0135, "num_tokens": 129162890.0, "reward": 5.960464477539063e-08, "reward_std": 0.26032984256744385, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11093943048806376, "rewards/wordcountpos_reward/raw_geo/std": 0.25172680744456916, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1133.0625, "completions/mean_terminated_length": 1133.0625, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.5905181036207241, "frac_reward_zero_std": 0.0, "grad_norm": 3.401551665145048, "kl": 0.021484375, "learning_rate": 4.868915547009132e-07, "loss": 0.001, "num_tokens": 129206883.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0114874839782715, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.061259220669361715, "rewards/wordcountpos_reward/raw_geo/std": 0.10383206137664296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1197.8125, "completions/mean_terminated_length": 1197.8125, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.5907181436287258, "frac_reward_zero_std": 0.0, "grad_norm": 3.35796090024454, "kl": 0.0172882080078125, "learning_rate": 4.865804464187263e-07, "loss": 0.0273, "num_tokens": 129254072.0, "reward": 0.0, "reward_std": 0.9819974899291992, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05535672024604884, "rewards/wordcountpos_reward/raw_geo/std": 0.11233620775011013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1217.375, "completions/mean_terminated_length": 1198.533447265625, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.5909181836367273, "frac_reward_zero_std": 0.0, "grad_norm": 2.985857638960203, "kl": 0.0221099853515625, "learning_rate": 4.862693690602015e-07, "loss": 0.0047, "num_tokens": 129307222.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4867950975894928, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09114024182044048, "rewards/wordcountpos_reward/raw_geo/std": 0.11107968050946687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1007.625, "completions/mean_terminated_length": 1007.625, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.591118223644729, "frac_reward_zero_std": 0.0, "grad_norm": 3.7859780698198, "kl": 0.01885986328125, "learning_rate": 4.859583227770217e-07, "loss": -0.0269, "num_tokens": 129357504.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0038504600524902, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0793420143720521, "rewards/wordcountpos_reward/raw_geo/std": 0.09194297019918528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1149.6875, "completions/mean_terminated_length": 1149.6875, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.5913182636527305, "frac_reward_zero_std": 0.0, "grad_norm": 3.477590522364118, "kl": 0.021820068359375, "learning_rate": 4.856473077208546e-07, "loss": -0.0065, "num_tokens": 129401443.0, "reward": 0.0, "reward_std": 0.9997801184654236, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.002869175714485117, "rewards/wordcountpos_reward/raw_geo/std": 0.07125379061629099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1121.0, "completions/mean_terminated_length": 1121.0, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.5915183036607321, "frac_reward_zero_std": 0.0, "grad_norm": 3.6446304573248742, "kl": 0.02227783203125, "learning_rate": 4.853363240433524e-07, "loss": -0.0302, "num_tokens": 129442627.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7032204866409302, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.061409318837461715, "rewards/wordcountpos_reward/raw_geo/std": 0.0750741847301771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 2957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1326.8125, "completions/mean_terminated_length": 1269.0833740234375, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.5917183436687338, "frac_reward_zero_std": 0.0, "grad_norm": 2.5883280058619733, "kl": 0.012969970703125, "learning_rate": 4.850253718961523e-07, "loss": 0.002, "num_tokens": 129486376.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9953677654266357, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12158746193252383, "rewards/wordcountpos_reward/raw_geo/std": 0.1205034581290265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1290.25, "completions/mean_terminated_length": 1276.2667236328125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.5919183836767353, "frac_reward_zero_std": 0.0, "grad_norm": 3.370613279951624, "kl": 0.0196533203125, "learning_rate": 4.847144514308759e-07, "loss": 0.0033, "num_tokens": 129533348.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9424846172332764, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017306315783704663, "rewards/wordcountpos_reward/raw_geo/std": 0.08323158195728211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1125.25, "completions/mean_terminated_length": 1125.25, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.592118423684737, "frac_reward_zero_std": 0.0, "grad_norm": 3.4170659859988133, "kl": 0.021942138671875, "learning_rate": 4.844035627991294e-07, "loss": -0.0208, "num_tokens": 129575864.0, "reward": 2.9802322387695312e-08, "reward_std": 0.39485231041908264, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07166847957333539, "rewards/wordcountpos_reward/raw_geo/std": 0.12929242902072594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387149, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1100.5, "completions/mean_terminated_length": 1100.5, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.5923184636927386, "frac_reward_zero_std": 0.0, "grad_norm": 3.4773777025689694, "kl": 0.0205078125, "learning_rate": 4.840927061525037e-07, "loss": 0.0173, "num_tokens": 129612800.0, "reward": 1.4901161193847656e-08, "reward_std": 1.013267159461975, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.049945132278359075, "rewards/wordcountpos_reward/raw_geo/std": 0.05444259074092291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1078.125, "completions/mean_terminated_length": 1050.0, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.5925185037007401, "frac_reward_zero_std": 0.0, "grad_norm": 2.836627464261482, "kl": 0.0200347900390625, "learning_rate": 4.837818816425736e-07, "loss": -0.0234, "num_tokens": 129652562.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0062086582183838, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04365186257866343, "rewards/wordcountpos_reward/raw_geo/std": 0.19465026795431162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 1050.0625, "completions/mean_terminated_length": 1050.0625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.5927185437087418, "frac_reward_zero_std": 0.0, "grad_norm": 2.835627876153053, "kl": 0.0135345458984375, "learning_rate": 4.834710894208988e-07, "loss": -0.0029, "num_tokens": 129698539.0, "reward": 0.0, "reward_std": 0.9592098593711853, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03584771722735242, "rewards/wordcountpos_reward/raw_geo/std": 0.07248576354438162, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852975, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1068.5, "completions/mean_terminated_length": 1068.5, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.5929185837167433, "frac_reward_zero_std": 0.0, "grad_norm": 3.3559075253862183, "kl": 0.02044677734375, "learning_rate": 4.831603296390228e-07, "loss": 0.0054, "num_tokens": 129740987.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0583454370498657, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.050688161543957624, "rewards/wordcountpos_reward/raw_geo/std": 0.062393176064278785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 961.5625, "completions/mean_terminated_length": 961.5625, "completions/min_length": 523.0, "completions/min_terminated_length": 523.0, "epoch": 0.593118623724745, "frac_reward_zero_std": 0.0, "grad_norm": 3.913525496812335, "kl": 0.023193359375, "learning_rate": 4.828496024484737e-07, "loss": 0.0495, "num_tokens": 129780148.0, "reward": -2.9802322387695312e-08, "reward_std": 0.992141604423523, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0004432610388490862, "rewards/wordcountpos_reward/raw_geo/std": 0.0628399692407165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1016.5, "completions/mean_terminated_length": 1016.5, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.5933186637327466, "frac_reward_zero_std": 0.0, "grad_norm": 3.217137232718677, "kl": 0.01625823974609375, "learning_rate": 4.82538908000763e-07, "loss": 0.0037, "num_tokens": 129816028.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9180837273597717, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06123500805613899, "rewards/wordcountpos_reward/raw_geo/std": 0.1458768530454621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12531441937663723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1120.625, "completions/mean_terminated_length": 1095.3333740234375, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.5935187037407481, "frac_reward_zero_std": 0.0, "grad_norm": 2.921760398383484, "kl": 0.0149383544921875, "learning_rate": 4.822282464473875e-07, "loss": 0.0191, "num_tokens": 129856398.0, "reward": 0.0, "reward_std": 0.4276248514652252, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05671251849124535, "rewards/wordcountpos_reward/raw_geo/std": 0.23059906514078726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.17971170328138425, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1265.5625, "completions/mean_terminated_length": 1249.933349609375, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.5937187437487498, "frac_reward_zero_std": 0.0, "grad_norm": 2.4974144052451797, "kl": 0.012725830078125, "learning_rate": 4.819176179398266e-07, "loss": -0.0073, "num_tokens": 129900951.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9235851764678955, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.40650067418151875, "rewards/wordcountpos_reward/raw_geo/std": 0.14395947456450378, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1223.9375, "completions/mean_terminated_length": 1184.5, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.5939187837567513, "frac_reward_zero_std": 0.0, "grad_norm": 3.1075935996862074, "kl": 0.018829345703125, "learning_rate": 4.816070226295446e-07, "loss": -0.0379, "num_tokens": 129953046.0, "reward": 0.0, "reward_std": 0.9943954944610596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13304535844447554, "rewards/wordcountpos_reward/raw_geo/std": 0.1590228462116463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0768596604689834, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1198.0625, "completions/mean_terminated_length": 1198.0625, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.594118823764753, "frac_reward_zero_std": 0.0, "grad_norm": 3.044085713312657, "kl": 0.0165252685546875, "learning_rate": 4.812964606679885e-07, "loss": 0.0142, "num_tokens": 129991663.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8950831890106201, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02831835169588172, "rewards/wordcountpos_reward/raw_geo/std": 0.06598378008230438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1032.9375, "completions/mean_terminated_length": 1032.9375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.5943188637727546, "frac_reward_zero_std": 0.0, "grad_norm": 3.8657417956695928, "kl": 0.023101806640625, "learning_rate": 4.809859322065905e-07, "loss": -0.0033, "num_tokens": 130039990.0, "reward": 0.0, "reward_std": 0.9467697143554688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2129481775289309, "rewards/wordcountpos_reward/raw_geo/std": 0.06829502461373002, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1567612007930345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1277.125, "completions/mean_terminated_length": 1202.8333740234375, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.5945189037807561, "frac_reward_zero_std": 0.0, "grad_norm": 3.3114068839026123, "kl": 0.0175933837890625, "learning_rate": 4.806754373967652e-07, "loss": 0.0299, "num_tokens": 130089480.0, "reward": 3.725290298461914e-08, "reward_std": 0.8934400081634521, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0285090409100262, "rewards/wordcountpos_reward/raw_geo/std": 0.16292842233326302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1285.5625, "completions/mean_terminated_length": 1236.0770263671875, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.5947189437887578, "frac_reward_zero_std": 0.0, "grad_norm": 3.1798968251845507, "kl": 0.01873779296875, "learning_rate": 4.803649763899115e-07, "loss": -0.002, "num_tokens": 130138113.0, "reward": 0.0, "reward_std": 0.8109619617462158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0010769840263463362, "rewards/wordcountpos_reward/raw_geo/std": 0.12253442358598103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1150.3125, "completions/mean_terminated_length": 1150.3125, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.5949189837967593, "frac_reward_zero_std": 0.0, "grad_norm": 2.971395770801547, "kl": 0.017608642578125, "learning_rate": 4.800545493374114e-07, "loss": -0.0458, "num_tokens": 130176502.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9346969127655029, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0358264422454289, "rewards/wordcountpos_reward/raw_geo/std": 0.06917955647231729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1319.6875, "completions/mean_terminated_length": 1139.375, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.595119023804761, "frac_reward_zero_std": 0.0, "grad_norm": 2.770501226651022, "kl": 0.0139617919921875, "learning_rate": 4.797441563906309e-07, "loss": -0.0256, "num_tokens": 130232945.0, "reward": 0.0, "reward_std": 0.9195587635040283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11446833493531647, "rewards/wordcountpos_reward/raw_geo/std": 0.24304589645068714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1149.0625, "completions/mean_terminated_length": 1149.0625, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.5953190638127626, "frac_reward_zero_std": 0.0, "grad_norm": 2.657663021385397, "kl": 0.01329803466796875, "learning_rate": 4.794337977009185e-07, "loss": 0.01, "num_tokens": 130276946.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8801113367080688, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12243374117202994, "rewards/wordcountpos_reward/raw_geo/std": 0.08036997213360543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1261.75, "completions/mean_terminated_length": 1153.45458984375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.5955191038207641, "frac_reward_zero_std": 0.0, "grad_norm": 3.2299504103974432, "kl": 0.0155181884765625, "learning_rate": 4.791234734196069e-07, "loss": -0.0058, "num_tokens": 130323662.0, "reward": 0.0, "reward_std": 0.7938050031661987, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014420491490707323, "rewards/wordcountpos_reward/raw_geo/std": 0.14538287628051008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1161.8125, "completions/mean_terminated_length": 1161.8125, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.5957191438287658, "frac_reward_zero_std": 0.0, "grad_norm": 2.889491330362605, "kl": 0.012237548828125, "learning_rate": 4.788131836980115e-07, "loss": -0.0346, "num_tokens": 130372099.0, "reward": 0.0, "reward_std": 1.0025705099105835, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10517167184094178, "rewards/wordcountpos_reward/raw_geo/std": 0.05544784581603977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1183.25, "completions/mean_terminated_length": 1162.1334228515625, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.5959191838367673, "frac_reward_zero_std": 0.0, "grad_norm": 3.3413800287982385, "kl": 0.020660400390625, "learning_rate": 4.785029286874311e-07, "loss": -0.0079, "num_tokens": 130410999.0, "reward": -1.4901161193847656e-08, "reward_std": 0.980555534362793, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1371987602327323, "rewards/wordcountpos_reward/raw_geo/std": 0.10457520301141943, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1104.3125, "completions/mean_terminated_length": 1104.3125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.596119223844769, "frac_reward_zero_std": 0.0, "grad_norm": 3.1824196600937475, "kl": 0.0159454345703125, "learning_rate": 4.781927085391471e-07, "loss": -0.0274, "num_tokens": 130452604.0, "reward": 0.0, "reward_std": 0.7686605453491211, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1877658521702975, "rewards/wordcountpos_reward/raw_geo/std": 0.19566804839474783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1016.6875, "completions/mean_terminated_length": 1016.6875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.5963192638527706, "frac_reward_zero_std": 0.0, "grad_norm": 3.7581629467490822, "kl": 0.022247314453125, "learning_rate": 4.778825234044246e-07, "loss": -0.0288, "num_tokens": 130493335.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0363513231277466, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02264452971979754, "rewards/wordcountpos_reward/raw_geo/std": 0.09129443656497231, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1079.5, "completions/mean_terminated_length": 1079.5, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.5965193038607721, "frac_reward_zero_std": 0.0, "grad_norm": 2.701784492807445, "kl": 0.0120391845703125, "learning_rate": 4.775723734345111e-07, "loss": -0.0206, "num_tokens": 130531735.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6675320863723755, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03665592620888321, "rewards/wordcountpos_reward/raw_geo/std": 0.048428950751036086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1084.0, "completions/mean_length": 1001.1875, "completions/mean_terminated_length": 967.9334106445312, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.5967193438687738, "frac_reward_zero_std": 0.0, "grad_norm": 3.217998520310616, "kl": 0.0187225341796875, "learning_rate": 4.772622587806373e-07, "loss": 0.0447, "num_tokens": 130573050.0, "reward": 1.4901161193847656e-08, "reward_std": 0.99530029296875, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19723118087431793, "rewards/wordcountpos_reward/raw_geo/std": 0.08725765724721202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16815997674172586, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1211.6875, "completions/mean_terminated_length": 1145.1539306640625, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5969193838767753, "frac_reward_zero_std": 0.0, "grad_norm": 3.4134685415923776, "kl": 0.019500732421875, "learning_rate": 4.769521795940163e-07, "loss": 0.0237, "num_tokens": 130618077.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0255215167999268, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04875051971216046, "rewards/wordcountpos_reward/raw_geo/std": 0.09233446732701618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1071.8125, "completions/mean_terminated_length": 1043.2667236328125, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.5971194238847769, "frac_reward_zero_std": 0.0, "grad_norm": 3.732170510191467, "kl": 0.020751953125, "learning_rate": 4.7664213602584433e-07, "loss": -0.0076, "num_tokens": 130672210.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9828521013259888, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07197872385278702, "rewards/wordcountpos_reward/raw_geo/std": 0.05193180932898244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1017.5625, "completions/mean_terminated_length": 1017.5625, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.5973194638927786, "frac_reward_zero_std": 0.0, "grad_norm": 3.0646203106763457, "kl": 0.0120849609375, "learning_rate": 4.763321282272997e-07, "loss": 0.0004, "num_tokens": 130705547.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7874888181686401, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05280845117171283, "rewards/wordcountpos_reward/raw_geo/std": 0.0814856573482461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1191.8125, "completions/mean_terminated_length": 1147.7857666015625, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.5975195039007801, "frac_reward_zero_std": 0.0, "grad_norm": 3.198688720595037, "kl": 0.021148681640625, "learning_rate": 4.760221563495442e-07, "loss": 0.0036, "num_tokens": 130758784.0, "reward": 0.0, "reward_std": 0.3672648072242737, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11693730159744772, "rewards/wordcountpos_reward/raw_geo/std": 0.1942374552420396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1397.5, "completions/mean_terminated_length": 1336.0, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.5977195439087818, "frac_reward_zero_std": 0.0, "grad_norm": 4.087362005965721, "kl": 0.0260009765625, "learning_rate": 4.7571222054372106e-07, "loss": 0.0085, "num_tokens": 130821192.0, "reward": 0.0, "reward_std": 0.6618865728378296, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05931524253726329, "rewards/wordcountpos_reward/raw_geo/std": 0.06538342574312991, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1126.5, "completions/mean_terminated_length": 1101.60009765625, "completions/min_length": 679.0, "completions/min_terminated_length": 679.0, "epoch": 0.5979195839167833, "frac_reward_zero_std": 0.0, "grad_norm": 3.5933458457623604, "kl": 0.0211181640625, "learning_rate": 4.754023209609566e-07, "loss": -0.003, "num_tokens": 130869360.0, "reward": 0.0, "reward_std": 0.8016161918640137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01726701025466127, "rewards/wordcountpos_reward/raw_geo/std": 0.08165095644673058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 966.75, "completions/mean_terminated_length": 966.75, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.5981196239247849, "frac_reward_zero_std": 0.0, "grad_norm": 3.7555597560427976, "kl": 0.0191650390625, "learning_rate": 4.7509245775235893e-07, "loss": -0.0401, "num_tokens": 130912788.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9224298000335693, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0121634812149024, "rewards/wordcountpos_reward/raw_geo/std": 0.041219084149101234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1095.75, "completions/mean_terminated_length": 1095.75, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.5983196639327866, "frac_reward_zero_std": 0.0, "grad_norm": 2.3509490580581414, "kl": 0.01104736328125, "learning_rate": 4.747826310690194e-07, "loss": -0.0111, "num_tokens": 130958192.0, "reward": 0.0, "reward_std": 1.008884072303772, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015179543446356976, "rewards/wordcountpos_reward/raw_geo/std": 0.2874515047529491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 2991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 876.75, "completions/mean_terminated_length": 876.75, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.5985197039407881, "frac_reward_zero_std": 0.0, "grad_norm": 4.070031873514627, "kl": 0.0382080078125, "learning_rate": 4.7447284106201014e-07, "loss": 0.0051, "num_tokens": 130997236.0, "reward": 0.0, "reward_std": 0.6871823668479919, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.115165552085907, "rewards/wordcountpos_reward/raw_geo/std": 0.07554839963784556, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382571, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1287.375, "completions/mean_terminated_length": 1257.0, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.5987197439487898, "frac_reward_zero_std": 0.0, "grad_norm": 3.0572039725581996, "kl": 0.0174560546875, "learning_rate": 4.741630878823868e-07, "loss": -0.0435, "num_tokens": 131051466.0, "reward": 0.0, "reward_std": 0.5675912499427795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022572718577590956, "rewards/wordcountpos_reward/raw_geo/std": 0.04969702841738175, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1106.125, "completions/mean_terminated_length": 1106.125, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.5989197839567914, "frac_reward_zero_std": 0.0, "grad_norm": 3.1538605594352727, "kl": 0.01593017578125, "learning_rate": 4.73853371681186e-07, "loss": -0.0746, "num_tokens": 131099660.0, "reward": -7.450580596923828e-09, "reward_std": 1.0580068826675415, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013921680991526915, "rewards/wordcountpos_reward/raw_geo/std": 0.0404819122385736, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1091.5625, "completions/mean_terminated_length": 1091.5625, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.5991198239647929, "frac_reward_zero_std": 0.0, "grad_norm": 3.3041018515953, "kl": 0.02142333984375, "learning_rate": 4.7354369260942707e-07, "loss": 0.0443, "num_tokens": 131137877.0, "reward": -4.470348358154297e-08, "reward_std": 0.9011232256889343, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19858270388850457, "rewards/wordcountpos_reward/raw_geo/std": 0.22715851659407307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1303.9375, "completions/mean_terminated_length": 1275.9285888671875, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.5993198639727946, "frac_reward_zero_std": 0.0, "grad_norm": 2.8942729300011463, "kl": 0.019622802734375, "learning_rate": 4.7323405081811083e-07, "loss": 0.0015, "num_tokens": 131182684.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9670311212539673, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11222304023877058, "rewards/wordcountpos_reward/raw_geo/std": 0.12514888372294075, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1097.375, "completions/mean_terminated_length": 1097.375, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.5995199039807961, "frac_reward_zero_std": 0.0, "grad_norm": 3.0227225189089957, "kl": 0.01332855224609375, "learning_rate": 4.7292444645821985e-07, "loss": -0.0441, "num_tokens": 131226562.0, "reward": 1.4901161193847656e-08, "reward_std": 1.059408187866211, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0890525197100807, "rewards/wordcountpos_reward/raw_geo/std": 0.25360789049958726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.16953094331342802, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 1050.75, "completions/mean_terminated_length": 1020.800048828125, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.5997199439887978, "frac_reward_zero_std": 0.0, "grad_norm": 3.149635809587768, "kl": 0.015716552734375, "learning_rate": 4.726148796807189e-07, "loss": 0.0393, "num_tokens": 131265430.0, "reward": 0.0, "reward_std": 0.9445211887359619, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07841904861236643, "rewards/wordcountpos_reward/raw_geo/std": 0.15348486492555669, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1208.375, "completions/mean_terminated_length": 1111.166748046875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.5999199839967994, "frac_reward_zero_std": 0.0, "grad_norm": 3.542889466307964, "kl": 0.02081298828125, "learning_rate": 4.723053506365539e-07, "loss": -0.0323, "num_tokens": 131308404.0, "reward": 0.0, "reward_std": 0.9447296857833862, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3534025005557959, "rewards/wordcountpos_reward/raw_geo/std": 0.25879253690377224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 2999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1207.0, "completions/max_terminated_length": 1207.0, "completions/mean_length": 934.1875, "completions/mean_terminated_length": 934.1875, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.6001200240048009, "frac_reward_zero_std": 0.0, "grad_norm": 3.189826888912903, "kl": 0.02215576171875, "learning_rate": 4.7199585947665276e-07, "loss": -0.0443, "num_tokens": 131337799.0, "reward": 2.9802322387695312e-08, "reward_std": 0.869807779788971, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18534674873762977, "rewards/wordcountpos_reward/raw_geo/std": 0.07530151329025572, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818892, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 962.0625, "completions/mean_terminated_length": 962.0625, "completions/min_length": 651.0, "completions/min_terminated_length": 651.0, "epoch": 0.6003200640128026, "frac_reward_zero_std": 0.0, "grad_norm": 3.6362228677987565, "kl": 0.021026611328125, "learning_rate": 4.716864063519248e-07, "loss": -0.014, "num_tokens": 131375240.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9516334533691406, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1594936798171518, "rewards/wordcountpos_reward/raw_geo/std": 0.09670657461120308, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1316.125, "completions/mean_terminated_length": 1132.25, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.6005201040208041, "frac_reward_zero_std": 0.0, "grad_norm": 3.0316421900449324, "kl": 0.01776123046875, "learning_rate": 4.713769914132607e-07, "loss": -0.0659, "num_tokens": 131429218.0, "reward": 1.862645149230957e-08, "reward_std": 1.0595431327819824, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026967646072806103, "rewards/wordcountpos_reward/raw_geo/std": 0.06550510514038048, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901858, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1104.5, "completions/mean_terminated_length": 1104.5, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.6007201440288058, "frac_reward_zero_std": 0.0, "grad_norm": 3.4167591806592017, "kl": 0.01788330078125, "learning_rate": 4.7106761481153255e-07, "loss": -0.0148, "num_tokens": 131468002.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9738637208938599, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02716202690059955, "rewards/wordcountpos_reward/raw_geo/std": 0.094549547643043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1169.1875, "completions/mean_terminated_length": 1169.1875, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.6009201840368074, "frac_reward_zero_std": 0.0, "grad_norm": 3.0794805576336683, "kl": 0.0164031982421875, "learning_rate": 4.7075827669759395e-07, "loss": -0.022, "num_tokens": 131506621.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9789403676986694, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15761140488083733, "rewards/wordcountpos_reward/raw_geo/std": 0.06572276037455936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1120.5, "completions/mean_terminated_length": 1095.2000732421875, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.6011202240448089, "frac_reward_zero_std": 0.0, "grad_norm": 3.1783623362955145, "kl": 0.01666259765625, "learning_rate": 4.7044897722227937e-07, "loss": 0.0041, "num_tokens": 131548317.0, "reward": 0.0, "reward_std": 0.6684358716011047, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10263210787716767, "rewards/wordcountpos_reward/raw_geo/std": 0.11900613728604376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1172.125, "completions/mean_terminated_length": 1096.4615478515625, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.6013202640528106, "frac_reward_zero_std": 0.0, "grad_norm": 3.001942805287992, "kl": 0.0137481689453125, "learning_rate": 4.701397165364048e-07, "loss": 0.0845, "num_tokens": 131591447.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8413606882095337, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05463803638935225, "rewards/wordcountpos_reward/raw_geo/std": 0.05785121234452542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1056.1875, "completions/mean_terminated_length": 1056.1875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.6015203040608121, "frac_reward_zero_std": 0.0, "grad_norm": 3.3204583376271333, "kl": 0.014373779296875, "learning_rate": 4.69830494790767e-07, "loss": 0.0163, "num_tokens": 131629666.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8420791625976562, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11606138031629594, "rewards/wordcountpos_reward/raw_geo/std": 0.226698458327229, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1153.0625, "completions/mean_terminated_length": 1129.933349609375, "completions/min_length": 1006.0, "completions/min_terminated_length": 1006.0, "epoch": 0.6017203440688138, "frac_reward_zero_std": 0.0, "grad_norm": 3.3394775765712343, "kl": 0.017059326171875, "learning_rate": 4.695213121361441e-07, "loss": -0.0318, "num_tokens": 131679899.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0361604690551758, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009593734945409759, "rewards/wordcountpos_reward/raw_geo/std": 0.0850947737104744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1137.0625, "completions/mean_terminated_length": 1112.86669921875, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.6019203840768154, "frac_reward_zero_std": 0.0, "grad_norm": 2.965556593933628, "kl": 0.01495361328125, "learning_rate": 4.692121687232947e-07, "loss": -0.013, "num_tokens": 131727716.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7457621097564697, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06947892116031806, "rewards/wordcountpos_reward/raw_geo/std": 0.11687616549578639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1021.8125, "completions/mean_terminated_length": 989.9334106445312, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.6021204240848169, "frac_reward_zero_std": 0.0, "grad_norm": 2.889908017202391, "kl": 0.0165252685546875, "learning_rate": 4.689030647029589e-07, "loss": -0.0034, "num_tokens": 131775753.0, "reward": 0.0, "reward_std": 0.862662672996521, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10857188860914424, "rewards/wordcountpos_reward/raw_geo/std": 0.1293788201555624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1162.4375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.6023204640928186, "frac_reward_zero_std": 0.0, "grad_norm": 3.0395123227405754, "kl": 0.014892578125, "learning_rate": 4.6859400022585683e-07, "loss": -0.0335, "num_tokens": 131817888.0, "reward": 0.0, "reward_std": 1.0424678325653076, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10250191833533834, "rewards/wordcountpos_reward/raw_geo/std": 0.23468552702967868, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 1099.4375, "completions/mean_terminated_length": 1072.7333984375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.6025205041008201, "frac_reward_zero_std": 0.0, "grad_norm": 3.4948202630768863, "kl": 0.021026611328125, "learning_rate": 4.6828497544268997e-07, "loss": 0.0147, "num_tokens": 131854239.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0268830060958862, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17122050234015443, "rewards/wordcountpos_reward/raw_geo/std": 0.1527498051025204, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 957.0, "completions/max_terminated_length": 957.0, "completions/mean_length": 821.625, "completions/mean_terminated_length": 821.625, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.6027205441088218, "frac_reward_zero_std": 0.0, "grad_norm": 2.1138863236564243, "kl": 0.0127410888671875, "learning_rate": 4.679759905041399e-07, "loss": -0.0196, "num_tokens": 131888193.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5918282270431519, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10730511257842651, "rewards/wordcountpos_reward/raw_geo/std": 0.10732402029661767, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1343.75, "completions/mean_terminated_length": 1307.6923828125, "completions/min_length": 1118.0, "completions/min_terminated_length": 1118.0, "epoch": 0.6029205841168234, "frac_reward_zero_std": 0.0, "grad_norm": 3.119684054995698, "kl": 0.0206298828125, "learning_rate": 4.676670455608698e-07, "loss": -0.012, "num_tokens": 131938837.0, "reward": 0.0, "reward_std": 0.7428402900695801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.043839434441177126, "rewards/wordcountpos_reward/raw_geo/std": 0.31999062293835284, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.11080513425729775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1169.3125, "completions/mean_terminated_length": 1122.071533203125, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.6031206241248249, "frac_reward_zero_std": 0.0, "grad_norm": 2.997431644258113, "kl": 0.015350341796875, "learning_rate": 4.6735814076352164e-07, "loss": -0.0305, "num_tokens": 131981106.0, "reward": -7.450580596923828e-09, "reward_std": 1.0496398210525513, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07917901045609663, "rewards/wordcountpos_reward/raw_geo/std": 0.2006828512220999, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1028.3125, "completions/mean_terminated_length": 1028.3125, "completions/min_length": 612.0, "completions/min_terminated_length": 612.0, "epoch": 0.6033206641328266, "frac_reward_zero_std": 0.0, "grad_norm": 3.6813457854635407, "kl": 0.02581787109375, "learning_rate": 4.670492762627197e-07, "loss": 0.0029, "num_tokens": 132026415.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4672084152698517, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13557964528831823, "rewards/wordcountpos_reward/raw_geo/std": 0.2080087435706035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1309.8125, "completions/mean_terminated_length": 1265.923095703125, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.6035207041408281, "frac_reward_zero_std": 0.0, "grad_norm": 2.408294026235116, "kl": 0.0145111083984375, "learning_rate": 4.667404522090671e-07, "loss": -0.0536, "num_tokens": 132072988.0, "reward": 0.0, "reward_std": 0.778279721736908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1503076561800854, "rewards/wordcountpos_reward/raw_geo/std": 0.15119584529850297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1041.25, "completions/mean_terminated_length": 1041.25, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.6037207441488298, "frac_reward_zero_std": 0.0, "grad_norm": 2.803463151450791, "kl": 0.0129852294921875, "learning_rate": 4.6643166875314834e-07, "loss": 0.001, "num_tokens": 132110528.0, "reward": -2.60770320892334e-08, "reward_std": 1.067284345626831, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006994295278201501, "rewards/wordcountpos_reward/raw_geo/std": 0.044682087489666934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1154.0, "completions/mean_terminated_length": 1130.933349609375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.6039207841568314, "frac_reward_zero_std": 0.0, "grad_norm": 3.17827530176995, "kl": 0.019866943359375, "learning_rate": 4.6612292604552704e-07, "loss": 0.0265, "num_tokens": 132155344.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9355877637863159, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20650071280802937, "rewards/wordcountpos_reward/raw_geo/std": 0.1613135326411681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13109227736669002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1286.375, "completions/mean_terminated_length": 1158.2000732421875, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.6041208241648329, "frac_reward_zero_std": 0.0, "grad_norm": 2.704231291168677, "kl": 0.0159149169921875, "learning_rate": 4.658142242367481e-07, "loss": -0.1093, "num_tokens": 132202438.0, "reward": 0.0, "reward_std": 0.5959218740463257, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.034429878812258355, "rewards/wordcountpos_reward/raw_geo/std": 0.15983774589246222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1161.0, "completions/max_terminated_length": 1161.0, "completions/mean_length": 880.875, "completions/mean_terminated_length": 880.875, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.6043208641728346, "frac_reward_zero_std": 0.0, "grad_norm": 3.1069559827320834, "kl": 0.0150909423828125, "learning_rate": 4.655055634773355e-07, "loss": -0.0207, "num_tokens": 132236660.0, "reward": 2.60770320892334e-08, "reward_std": 1.0660862922668457, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08278667876480017, "rewards/wordcountpos_reward/raw_geo/std": 0.12726277617479545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1056.5, "completions/mean_terminated_length": 1056.5, "completions/min_length": 672.0, "completions/min_terminated_length": 672.0, "epoch": 0.6045209041808361, "frac_reward_zero_std": 0.0, "grad_norm": 3.0208187452438207, "kl": 0.014892578125, "learning_rate": 4.6519694391779406e-07, "loss": -0.0173, "num_tokens": 132267364.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6936368346214294, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11398180903169082, "rewards/wordcountpos_reward/raw_geo/std": 0.1640021900463067, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1434.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1201.375, "completions/mean_terminated_length": 1201.375, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.6047209441888378, "frac_reward_zero_std": 0.0, "grad_norm": 3.3158566128660003, "kl": 0.019500732421875, "learning_rate": 4.648883657086077e-07, "loss": -0.0463, "num_tokens": 132314042.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9655158519744873, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03510258106174737, "rewards/wordcountpos_reward/raw_geo/std": 0.08319902287922618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1132.6875, "completions/mean_terminated_length": 1132.6875, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.6049209841968394, "frac_reward_zero_std": 0.0, "grad_norm": 3.241323673392083, "kl": 0.0164947509765625, "learning_rate": 4.6457982900024083e-07, "loss": -0.0133, "num_tokens": 132365565.0, "reward": 0.0, "reward_std": 1.0005693435668945, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11287847487991617, "rewards/wordcountpos_reward/raw_geo/std": 0.1106367438170582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1086.8125, "completions/mean_terminated_length": 1086.8125, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.6051210242048409, "frac_reward_zero_std": 0.0, "grad_norm": 2.9578823537815553, "kl": 0.01483154296875, "learning_rate": 4.642713339431371e-07, "loss": -0.034, "num_tokens": 132398970.0, "reward": -2.2351741790771484e-08, "reward_std": 1.015271544456482, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014164475889386918, "rewards/wordcountpos_reward/raw_geo/std": 0.06844850229099571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1267.9375, "completions/mean_terminated_length": 1252.4666748046875, "completions/min_length": 1116.0, "completions/min_terminated_length": 1116.0, "epoch": 0.6053210642128426, "frac_reward_zero_std": 0.0, "grad_norm": 3.0558664002438847, "kl": 0.01739501953125, "learning_rate": 4.639628806877204e-07, "loss": -0.0168, "num_tokens": 132443465.0, "reward": 0.0, "reward_std": 0.8241527080535889, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24225642877677894, "rewards/wordcountpos_reward/raw_geo/std": 0.23529969623421618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 985.6875, "completions/mean_terminated_length": 985.6875, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.6055211042208442, "frac_reward_zero_std": 0.0, "grad_norm": 3.4282943689280794, "kl": 0.01434326171875, "learning_rate": 4.6365446938439357e-07, "loss": -0.0149, "num_tokens": 132470948.0, "reward": 0.0, "reward_std": 0.9814538955688477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0440676614152069, "rewards/wordcountpos_reward/raw_geo/std": 0.051943973103556224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1125.125, "completions/mean_terminated_length": 1125.125, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.6057211442288458, "frac_reward_zero_std": 0.0, "grad_norm": 3.5332672639873155, "kl": 0.02008056640625, "learning_rate": 4.633461001835396e-07, "loss": 0.0253, "num_tokens": 132518766.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0451531410217285, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05798161380658767, "rewards/wordcountpos_reward/raw_geo/std": 0.19632465624040676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1097.1875, "completions/mean_terminated_length": 1097.1875, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.6059211842368474, "frac_reward_zero_std": 0.0, "grad_norm": 3.5168313543866803, "kl": 0.024078369140625, "learning_rate": 4.6303777323552076e-07, "loss": 0.0158, "num_tokens": 132560977.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5629277229309082, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11039862949679109, "rewards/wordcountpos_reward/raw_geo/std": 0.14664922685695628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1075.8125, "completions/mean_terminated_length": 1075.8125, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.6061212242448489, "frac_reward_zero_std": 0.0, "grad_norm": 2.500843738551943, "kl": 0.014251708984375, "learning_rate": 4.627294886906785e-07, "loss": -0.053, "num_tokens": 132595806.0, "reward": 0.0, "reward_std": 0.6349881887435913, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14995172917224614, "rewards/wordcountpos_reward/raw_geo/std": 0.17708010928859777, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1407.75, "completions/mean_terminated_length": 1315.5, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.6063212642528506, "frac_reward_zero_std": 0.0, "grad_norm": 2.5923994798839454, "kl": 0.0175018310546875, "learning_rate": 4.624212466993338e-07, "loss": -0.035, "num_tokens": 132646762.0, "reward": 7.450580596923828e-09, "reward_std": 1.0528333187103271, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1448051836118537, "rewards/wordcountpos_reward/raw_geo/std": 0.0978205179926813, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1060.0, "completions/mean_terminated_length": 1030.666748046875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.6065213042608522, "frac_reward_zero_std": 0.0, "grad_norm": 3.064050952484277, "kl": 0.016693115234375, "learning_rate": 4.621130474117869e-07, "loss": 0.0056, "num_tokens": 132679410.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6213269233703613, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1320392335214174, "rewards/wordcountpos_reward/raw_geo/std": 0.10456729288328466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1200.125, "completions/mean_terminated_length": 1180.1334228515625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.6067213442688538, "frac_reward_zero_std": 0.0, "grad_norm": 3.044200592084815, "kl": 0.0164031982421875, "learning_rate": 4.618048909783173e-07, "loss": -0.0111, "num_tokens": 132731620.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8682878017425537, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025628929022234567, "rewards/wordcountpos_reward/raw_geo/std": 0.04066349178088577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1121.0625, "completions/mean_terminated_length": 1121.0625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6069213842768554, "frac_reward_zero_std": 0.0, "grad_norm": 2.7616487767736215, "kl": 0.0133819580078125, "learning_rate": 4.6149677754918324e-07, "loss": 0.0066, "num_tokens": 132768421.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0572259426116943, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04516664023505598, "rewards/wordcountpos_reward/raw_geo/std": 0.04586499996957675, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1167.0, "completions/mean_terminated_length": 1144.800048828125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.6071214242848569, "frac_reward_zero_std": 0.0, "grad_norm": 3.3144766040836338, "kl": 0.0157928466796875, "learning_rate": 4.611887072746222e-07, "loss": 0.0227, "num_tokens": 132814821.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0029528141021729, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.050647551704791155, "rewards/wordcountpos_reward/raw_geo/std": 0.06796846631040172, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1109.0625, "completions/mean_terminated_length": 1109.0625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.6073214642928586, "frac_reward_zero_std": 0.0, "grad_norm": 2.9584408173147314, "kl": 0.015960693359375, "learning_rate": 4.6088068030485114e-07, "loss": 0.0592, "num_tokens": 132848654.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5745964050292969, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05420853337567779, "rewards/wordcountpos_reward/raw_geo/std": 0.02315598981875554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1211.875, "completions/mean_terminated_length": 1192.666748046875, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.6075215043008602, "frac_reward_zero_std": 0.0, "grad_norm": 3.398089647123142, "kl": 0.02008056640625, "learning_rate": 4.605726967900646e-07, "loss": 0.0518, "num_tokens": 132892388.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8335475921630859, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02991664713663923, "rewards/wordcountpos_reward/raw_geo/std": 0.06057509156900562, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1177.4375, "completions/mean_terminated_length": 1131.357177734375, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.6077215443088618, "frac_reward_zero_std": 0.0, "grad_norm": 3.461664062091483, "kl": 0.021240234375, "learning_rate": 4.6026475688043766e-07, "loss": 0.0004, "num_tokens": 132938035.0, "reward": 0.0, "reward_std": 0.7553505897521973, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.019400565751925537, "rewards/wordcountpos_reward/raw_geo/std": 0.174139197443015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1207.25, "completions/mean_terminated_length": 1109.666748046875, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.6079215843168634, "frac_reward_zero_std": 0.0, "grad_norm": 3.0829700431911617, "kl": 0.02178955078125, "learning_rate": 4.5995686072612237e-07, "loss": -0.0211, "num_tokens": 132990519.0, "reward": 0.0, "reward_std": 0.9189120531082153, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.043996713051478745, "rewards/wordcountpos_reward/raw_geo/std": 0.1428505314725974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1174.1875, "completions/mean_terminated_length": 1152.4666748046875, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.6081216243248649, "frac_reward_zero_std": 0.0, "grad_norm": 2.981330311162852, "kl": 0.01507568359375, "learning_rate": 4.59649008477251e-07, "loss": -0.0176, "num_tokens": 133029970.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7024485468864441, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.034048027219948006, "rewards/wordcountpos_reward/raw_geo/std": 0.03794806454416943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1171.125, "completions/mean_terminated_length": 1171.125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.6083216643328666, "frac_reward_zero_std": 0.0, "grad_norm": 3.440072968884491, "kl": 0.024627685546875, "learning_rate": 4.5934120028393296e-07, "loss": -0.0202, "num_tokens": 133068068.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9681779146194458, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1365944600135508, "rewards/wordcountpos_reward/raw_geo/std": 0.2006466752315111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1306.25, "completions/mean_terminated_length": 1190.0, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.6085217043408682, "frac_reward_zero_std": 0.0, "grad_norm": 2.9344978771393735, "kl": 0.0150146484375, "learning_rate": 4.590334362962576e-07, "loss": -0.0191, "num_tokens": 133116576.0, "reward": 0.0, "reward_std": 0.9054113626480103, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08082219138677543, "rewards/wordcountpos_reward/raw_geo/std": 0.060173336340424544, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1105.6875, "completions/mean_terminated_length": 869.1000366210938, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.6087217443488697, "frac_reward_zero_std": 0.0, "grad_norm": 2.563690017299328, "kl": 0.01308441162109375, "learning_rate": 4.5872571666429173e-07, "loss": 0.0383, "num_tokens": 133163571.0, "reward": 0.0, "reward_std": 0.7152486443519592, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1537447873305372, "rewards/wordcountpos_reward/raw_geo/std": 0.0943520496887652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1321.5625, "completions/mean_terminated_length": 1296.071533203125, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.6089217843568714, "frac_reward_zero_std": 0.0, "grad_norm": 3.2767615464156488, "kl": 0.021270751953125, "learning_rate": 4.5841804153808084e-07, "loss": 0.0114, "num_tokens": 133215564.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9330902695655823, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04143690891651257, "rewards/wordcountpos_reward/raw_geo/std": 0.1834830070232908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1102.875, "completions/mean_terminated_length": 1102.875, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.6091218243648729, "frac_reward_zero_std": 0.0, "grad_norm": 3.4084995696646936, "kl": 0.019683837890625, "learning_rate": 4.581104110676487e-07, "loss": -0.0049, "num_tokens": 133255738.0, "reward": 0.0, "reward_std": 0.8985530138015747, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14230589719292389, "rewards/wordcountpos_reward/raw_geo/std": 0.1645747475546448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1142.0625, "completions/mean_terminated_length": 1090.9285888671875, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.6093218643728746, "frac_reward_zero_std": 0.0, "grad_norm": 3.208323874670089, "kl": 0.017974853515625, "learning_rate": 4.5780282540299764e-07, "loss": 0.0463, "num_tokens": 133288987.0, "reward": -1.4901161193847656e-08, "reward_std": 1.056592583656311, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05928352248557726, "rewards/wordcountpos_reward/raw_geo/std": 0.11351697048118697, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1013.375, "completions/mean_terminated_length": 1013.375, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.6095219043808762, "frac_reward_zero_std": 0.0, "grad_norm": 2.798677456184356, "kl": 0.0142974853515625, "learning_rate": 4.5749528469410735e-07, "loss": 0.0095, "num_tokens": 133330465.0, "reward": 0.0, "reward_std": 0.6641088128089905, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21038243534776105, "rewards/wordcountpos_reward/raw_geo/std": 0.13015346691532426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1159.0, "completions/mean_terminated_length": 1110.2857666015625, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.6097219443888777, "frac_reward_zero_std": 0.0, "grad_norm": 2.795057115256919, "kl": 0.0149993896484375, "learning_rate": 4.5718778909093635e-07, "loss": 0.0091, "num_tokens": 133365489.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9390414953231812, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044630107396930255, "rewards/wordcountpos_reward/raw_geo/std": 0.05149869467848775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1083.5625, "completions/mean_terminated_length": 1055.800048828125, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.6099219843968794, "frac_reward_zero_std": 0.0, "grad_norm": 3.1287059625722393, "kl": 0.017669677734375, "learning_rate": 4.568803387434209e-07, "loss": 0.0266, "num_tokens": 133408594.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6530812978744507, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00714416106546222, "rewards/wordcountpos_reward/raw_geo/std": 0.11503689379185039, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1245.125, "completions/mean_terminated_length": 1245.125, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.610122024404881, "frac_reward_zero_std": 0.0, "grad_norm": 3.158848542630191, "kl": 0.019561767578125, "learning_rate": 4.565729338014751e-07, "loss": -0.0409, "num_tokens": 133455820.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5240070819854736, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04371003682041324, "rewards/wordcountpos_reward/raw_geo/std": 0.03885651923869752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1072.0, "completions/mean_terminated_length": 1072.0, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.6103220644128826, "frac_reward_zero_std": 0.0, "grad_norm": 2.416205822118789, "kl": 0.0107574462890625, "learning_rate": 4.56265574414991e-07, "loss": -0.0418, "num_tokens": 133506708.0, "reward": 7.450580596923828e-09, "reward_std": 1.0615763664245605, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.055286810236748, "rewards/wordcountpos_reward/raw_geo/std": 0.12649383956083224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1166.125, "completions/mean_terminated_length": 1166.125, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.6105221044208842, "frac_reward_zero_std": 0.0, "grad_norm": 3.0362953343205534, "kl": 0.016021728515625, "learning_rate": 4.559582607338385e-07, "loss": 0.004, "num_tokens": 133551214.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0563901662826538, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2222949137075252, "rewards/wordcountpos_reward/raw_geo/std": 0.09765774350098881, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1241.9375, "completions/mean_terminated_length": 1241.9375, "completions/min_length": 1109.0, "completions/min_terminated_length": 1109.0, "epoch": 0.6107221444288857, "frac_reward_zero_std": 0.0, "grad_norm": 1.8484439564332535, "kl": 0.00885009765625, "learning_rate": 4.5565099290786503e-07, "loss": -0.0192, "num_tokens": 133594013.0, "reward": 2.0489096641540527e-08, "reward_std": 0.9127282500267029, "rewards/wordcountpos_reward/mean": 2.0489096641540527e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0332628412074196, "rewards/wordcountpos_reward/raw_geo/std": 0.2620007673236282, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1166.4375, "completions/mean_terminated_length": 1144.2000732421875, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.6109221844368874, "frac_reward_zero_std": 0.0, "grad_norm": 3.223537475363271, "kl": 0.01519775390625, "learning_rate": 4.553437710868958e-07, "loss": 0.0081, "num_tokens": 133631916.0, "reward": 0.0, "reward_std": 0.9208865165710449, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07482549713199244, "rewards/wordcountpos_reward/raw_geo/std": 0.18333193424448474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.054433105395181744, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1276.875, "completions/mean_terminated_length": 1276.875, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.611122224444889, "frac_reward_zero_std": 0.0, "grad_norm": 2.7362105768168687, "kl": 0.01311492919921875, "learning_rate": 4.550365954207338e-07, "loss": -0.0116, "num_tokens": 133677306.0, "reward": -7.450580596923828e-09, "reward_std": 1.0138044357299805, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.38657340302221227, "rewards/wordcountpos_reward/raw_geo/std": 0.21073611115974075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1095.1875, "completions/mean_terminated_length": 1068.2000732421875, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.6113222644528906, "frac_reward_zero_std": 0.0, "grad_norm": 3.226833364151032, "kl": 0.0159149169921875, "learning_rate": 4.5472946605915886e-07, "loss": -0.0497, "num_tokens": 133711277.0, "reward": -5.960464477539063e-08, "reward_std": 0.9074456095695496, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06217810907781832, "rewards/wordcountpos_reward/raw_geo/std": 0.09592383207610719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1279.4375, "completions/mean_terminated_length": 1205.916748046875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.6115223044608922, "frac_reward_zero_std": 0.0, "grad_norm": 2.6343072299802675, "kl": 0.0143890380859375, "learning_rate": 4.5442238315192905e-07, "loss": -0.0262, "num_tokens": 133755572.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0054301023483276, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.049414344943061725, "rewards/wordcountpos_reward/raw_geo/std": 0.10835879031391482, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1307.375, "completions/mean_terminated_length": 1243.166748046875, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.6117223444688937, "frac_reward_zero_std": 0.0, "grad_norm": 3.136506042528088, "kl": 0.016448974609375, "learning_rate": 4.5411534684877916e-07, "loss": -0.0188, "num_tokens": 133802530.0, "reward": 0.0, "reward_std": 0.7219713926315308, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016462539885438367, "rewards/wordcountpos_reward/raw_geo/std": 0.13658574188751374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1216.125, "completions/mean_terminated_length": 1121.5, "completions/min_length": 463.0, "completions/min_terminated_length": 463.0, "epoch": 0.6119223844768954, "frac_reward_zero_std": 0.0, "grad_norm": 3.3442354324440924, "kl": 0.01812744140625, "learning_rate": 4.538083572994216e-07, "loss": -0.0018, "num_tokens": 133850804.0, "reward": 0.0, "reward_std": 0.8375164270401001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10363341676560012, "rewards/wordcountpos_reward/raw_geo/std": 0.08101518882285993, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1185.75, "completions/mean_terminated_length": 1185.75, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.612122424484897, "frac_reward_zero_std": 0.0, "grad_norm": 3.331863030968593, "kl": 0.01837158203125, "learning_rate": 4.5350141465354577e-07, "loss": 0.03, "num_tokens": 133897056.0, "reward": 0.0, "reward_std": 1.0467517375946045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10289466200525366, "rewards/wordcountpos_reward/raw_geo/std": 0.19252539686733025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1017.625, "completions/mean_terminated_length": 1017.625, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.6123224644928986, "frac_reward_zero_std": 0.0, "grad_norm": 2.995514248282947, "kl": 0.0178070068359375, "learning_rate": 4.531945190608184e-07, "loss": 0.0334, "num_tokens": 133929786.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7950400114059448, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.018095858756834324, "rewards/wordcountpos_reward/raw_geo/std": 0.05349419867349971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1241.5, "completions/mean_terminated_length": 1224.2667236328125, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.6125225045009002, "frac_reward_zero_std": 0.0, "grad_norm": 2.8581349280966655, "kl": 0.0153350830078125, "learning_rate": 4.5288767067088296e-07, "loss": 0.0138, "num_tokens": 133963962.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7173128128051758, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006567618958391732, "rewards/wordcountpos_reward/raw_geo/std": 0.08668682234034411, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 998.9375, "completions/mean_terminated_length": 998.9375, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.6127225445089017, "frac_reward_zero_std": 0.0, "grad_norm": 3.4252389632871947, "kl": 0.020721435546875, "learning_rate": 4.525808696333605e-07, "loss": -0.009, "num_tokens": 133997601.0, "reward": -4.470348358154297e-08, "reward_std": 1.067777395248413, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022218872506427574, "rewards/wordcountpos_reward/raw_geo/std": 0.05823044558882548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1140.875, "completions/mean_terminated_length": 1116.933349609375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.6129225845169034, "frac_reward_zero_std": 0.0, "grad_norm": 3.2673521487344086, "kl": 0.022674560546875, "learning_rate": 4.5227411609784815e-07, "loss": -0.0219, "num_tokens": 134038359.0, "reward": 2.9802322387695312e-08, "reward_std": 0.930027425289154, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10605073132682612, "rewards/wordcountpos_reward/raw_geo/std": 0.08447444603776384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1116.125, "completions/mean_terminated_length": 1090.533447265625, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.613122624524905, "frac_reward_zero_std": 0.0, "grad_norm": 3.6007617486975247, "kl": 0.01531982421875, "learning_rate": 4.5196741021392084e-07, "loss": -0.0199, "num_tokens": 134079185.0, "reward": 0.0, "reward_std": 0.7109473347663879, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0018762087318616707, "rewards/wordcountpos_reward/raw_geo/std": 0.05356846362491359, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1418.3125, "completions/mean_terminated_length": 1369.300048828125, "completions/min_length": 1137.0, "completions/min_terminated_length": 1137.0, "epoch": 0.6133226645329066, "frac_reward_zero_std": 0.0, "grad_norm": 2.4519403156356376, "kl": 0.0163116455078125, "learning_rate": 4.516607521311291e-07, "loss": -0.0111, "num_tokens": 134139510.0, "reward": 0.0, "reward_std": 0.827957272529602, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.047109545687136954, "rewards/wordcountpos_reward/raw_geo/std": 0.09975092571189426, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 1294.875, "completions/mean_terminated_length": 1089.75, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.6135227045409082, "frac_reward_zero_std": 0.0, "grad_norm": 3.046568961165715, "kl": 0.0178070068359375, "learning_rate": 4.513541419990016e-07, "loss": -0.0263, "num_tokens": 134187748.0, "reward": 0.0, "reward_std": 0.7411305904388428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09786327450385002, "rewards/wordcountpos_reward/raw_geo/std": 0.2840432865656331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1267.0, "completions/mean_length": 1105.6875, "completions/mean_terminated_length": 1079.4000244140625, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.6137227445489097, "frac_reward_zero_std": 0.0, "grad_norm": 3.258370244039903, "kl": 0.022216796875, "learning_rate": 4.51047579967042e-07, "loss": -0.0251, "num_tokens": 134238223.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0261038541793823, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.29599554926305516, "rewards/wordcountpos_reward/raw_geo/std": 0.39135490350018165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1109.375, "completions/mean_terminated_length": 1083.3333740234375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.6139227845569114, "frac_reward_zero_std": 0.0, "grad_norm": 3.1438859559798975, "kl": 0.02215576171875, "learning_rate": 4.5074106618473193e-07, "loss": -0.0447, "num_tokens": 134284485.0, "reward": 0.0, "reward_std": 1.0262510776519775, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024160899150408737, "rewards/wordcountpos_reward/raw_geo/std": 0.07279686909137771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1139.8125, "completions/mean_terminated_length": 1139.8125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.614122824564913, "frac_reward_zero_std": 0.0, "grad_norm": 2.8635832865040274, "kl": 0.013702392578125, "learning_rate": 4.5043460080152875e-07, "loss": -0.0148, "num_tokens": 134328250.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8682661652565002, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20471456517425152, "rewards/wordcountpos_reward/raw_geo/std": 0.11517886359628017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1127.5, "completions/mean_terminated_length": 1127.5, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.6143228645729146, "frac_reward_zero_std": 0.0, "grad_norm": 3.0209576695873137, "kl": 0.0172119140625, "learning_rate": 4.5012818396686624e-07, "loss": -0.0001, "num_tokens": 134368370.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7447686791419983, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012390008750051704, "rewards/wordcountpos_reward/raw_geo/std": 0.13789448078396943, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1208.0625, "completions/mean_terminated_length": 1110.75, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.6145229045809162, "frac_reward_zero_std": 0.0, "grad_norm": 2.8430346569088343, "kl": 0.012969970703125, "learning_rate": 4.4982181583015476e-07, "loss": 0.041, "num_tokens": 134412083.0, "reward": 0.0, "reward_std": 0.703311026096344, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10965958179540283, "rewards/wordcountpos_reward/raw_geo/std": 0.051183710853028794, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 940.8125, "completions/mean_terminated_length": 940.8125, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.6147229445889177, "frac_reward_zero_std": 0.0, "grad_norm": 3.3037288553947626, "kl": 0.0177459716796875, "learning_rate": 4.495154965407809e-07, "loss": -0.025, "num_tokens": 134452392.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9895670413970947, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08027033241568741, "rewards/wordcountpos_reward/raw_geo/std": 0.11949296765656513, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1198.1875, "completions/mean_terminated_length": 1178.0667724609375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.6149229845969194, "frac_reward_zero_std": 0.0, "grad_norm": 2.8553936800216495, "kl": 0.015045166015625, "learning_rate": 4.492092262481071e-07, "loss": 0.0315, "num_tokens": 134487403.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5515080690383911, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10060323033181184, "rewards/wordcountpos_reward/raw_geo/std": 0.10441946086553602, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1040.25, "completions/mean_terminated_length": 1040.25, "completions/min_length": 738.0, "completions/min_terminated_length": 738.0, "epoch": 0.615123024604921, "frac_reward_zero_std": 0.0, "grad_norm": 2.8226971711052853, "kl": 0.01602935791015625, "learning_rate": 4.4890300510147227e-07, "loss": 0.0055, "num_tokens": 134527071.0, "reward": 2.9802322387695312e-08, "reward_std": 0.42266973853111267, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19622745201985023, "rewards/wordcountpos_reward/raw_geo/std": 0.2361581117639133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1288.875, "completions/mean_terminated_length": 1274.800048828125, "completions/min_length": 1089.0, "completions/min_terminated_length": 1089.0, "epoch": 0.6153230646129226, "frac_reward_zero_std": 0.0, "grad_norm": 3.138719397421388, "kl": 0.01947021484375, "learning_rate": 4.485968332501913e-07, "loss": 0.0071, "num_tokens": 134572973.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9968717098236084, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01081790987122003, "rewards/wordcountpos_reward/raw_geo/std": 0.05776575586607224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1349.375, "completions/mean_terminated_length": 1339.3333740234375, "completions/min_length": 1055.0, "completions/min_terminated_length": 1055.0, "epoch": 0.6155231046209242, "frac_reward_zero_std": 0.0, "grad_norm": 3.2882235144785743, "kl": 0.0213623046875, "learning_rate": 4.4829071084355485e-07, "loss": -0.0031, "num_tokens": 134612019.0, "reward": 4.470348358154297e-08, "reward_std": 1.0661132335662842, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048939399076107594, "rewards/wordcountpos_reward/raw_geo/std": 0.07341294265415055, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1135.0, "completions/max_terminated_length": 1135.0, "completions/mean_length": 895.375, "completions/mean_terminated_length": 895.375, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.6157231446289257, "frac_reward_zero_std": 0.0, "grad_norm": 1.867695888341098, "kl": 0.005207061767578125, "learning_rate": 4.479846380308295e-07, "loss": 0.0072, "num_tokens": 134644217.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9523310661315918, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07200560943657443, "rewards/wordcountpos_reward/raw_geo/std": 0.09875055380685799, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1072.4000244140625, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.6159231846369274, "frac_reward_zero_std": 0.0, "grad_norm": 2.806921667198429, "kl": 0.0195770263671875, "learning_rate": 4.4767861496125814e-07, "loss": 0.021, "num_tokens": 134691667.0, "reward": 0.0, "reward_std": 0.5158624053001404, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19677323878018388, "rewards/wordcountpos_reward/raw_geo/std": 0.13907226599213796, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1388.5625, "completions/mean_terminated_length": 1372.6429443359375, "completions/min_length": 1233.0, "completions/min_terminated_length": 1233.0, "epoch": 0.616123224644929, "frac_reward_zero_std": 0.0, "grad_norm": 2.3402783202457047, "kl": 0.0136260986328125, "learning_rate": 4.4737264178405855e-07, "loss": -0.0043, "num_tokens": 134744332.0, "reward": 2.9802322387695312e-08, "reward_std": 0.45858219265937805, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09321861159478453, "rewards/wordcountpos_reward/raw_geo/std": 0.14582913056031618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1126.0, "completions/max_terminated_length": 1126.0, "completions/mean_length": 1010.875, "completions/mean_terminated_length": 1010.875, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.6163232646529306, "frac_reward_zero_std": 0.0, "grad_norm": 3.7126427086911193, "kl": 0.01898193359375, "learning_rate": 4.4706671864842505e-07, "loss": -0.031, "num_tokens": 134785242.0, "reward": 0.0, "reward_std": 0.9075813293457031, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11179271402152303, "rewards/wordcountpos_reward/raw_geo/std": 0.07616694062331979, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 982.5625, "completions/mean_terminated_length": 982.5625, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.6165233046609322, "frac_reward_zero_std": 0.0, "grad_norm": 2.3947898648164045, "kl": 0.0089263916015625, "learning_rate": 4.467608457035268e-07, "loss": -0.0353, "num_tokens": 134813699.0, "reward": 0.0, "reward_std": 0.9730143547058105, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1329865887928518, "rewards/wordcountpos_reward/raw_geo/std": 0.05578164620539789, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 988.0, "completions/mean_length": 1190.75, "completions/mean_terminated_length": 881.5, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.6167233446689337, "frac_reward_zero_std": 0.0, "grad_norm": 3.0226814952976357, "kl": 0.01214599609375, "learning_rate": 4.46455023098509e-07, "loss": -0.0098, "num_tokens": 134850207.0, "reward": 0.0, "reward_std": 0.91448974609375, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09168683303622047, "rewards/wordcountpos_reward/raw_geo/std": 0.15941293097198878, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1217.0625, "completions/mean_terminated_length": 1088.45458984375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.6169233846769354, "frac_reward_zero_std": 0.0, "grad_norm": 2.86237597618167, "kl": 0.016815185546875, "learning_rate": 4.4614925098249203e-07, "loss": -0.0108, "num_tokens": 134886424.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7869375348091125, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0013370726656478336, "rewards/wordcountpos_reward/raw_geo/std": 0.15542421315824434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1090.4375, "completions/mean_terminated_length": 1090.4375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.617123424684937, "frac_reward_zero_std": 0.0, "grad_norm": 3.7992666196178635, "kl": 0.0201416015625, "learning_rate": 4.4584352950457204e-07, "loss": -0.016, "num_tokens": 134931207.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9879498481750488, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2654547777940785, "rewards/wordcountpos_reward/raw_geo/std": 0.30734157067148266, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1164.0, "completions/mean_terminated_length": 1164.0, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.6173234646929386, "frac_reward_zero_std": 0.0, "grad_norm": 3.128541976316318, "kl": 0.016998291015625, "learning_rate": 4.455378588138199e-07, "loss": -0.0353, "num_tokens": 134970759.0, "reward": 0.0, "reward_std": 0.6662710905075073, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.013342182267895124, "rewards/wordcountpos_reward/raw_geo/std": 0.03854148466524118, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 977.9375, "completions/mean_terminated_length": 977.9375, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.6175235047009402, "frac_reward_zero_std": 0.0, "grad_norm": 3.5273780710552796, "kl": 0.01715087890625, "learning_rate": 4.452322390592823e-07, "loss": 0.0318, "num_tokens": 135002502.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9901870489120483, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.059424342942386604, "rewards/wordcountpos_reward/raw_geo/std": 0.08954789461450892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 957.5625, "completions/mean_terminated_length": 957.5625, "completions/min_length": 675.0, "completions/min_terminated_length": 675.0, "epoch": 0.6177235447089418, "frac_reward_zero_std": 0.0, "grad_norm": 3.661449792433229, "kl": 0.017547607421875, "learning_rate": 4.449266703899806e-07, "loss": -0.0546, "num_tokens": 135030375.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0079008340835571, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06108685328624488, "rewards/wordcountpos_reward/raw_geo/std": 0.10794731743357908, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1186.0625, "completions/mean_terminated_length": 1165.1334228515625, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.6179235847169434, "frac_reward_zero_std": 0.0, "grad_norm": 3.2681975333416244, "kl": 0.0211181640625, "learning_rate": 4.4462115295491156e-07, "loss": -0.0003, "num_tokens": 135077320.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9644196629524231, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0480853109552666, "rewards/wordcountpos_reward/raw_geo/std": 0.08789309194537608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1201.5625, "completions/mean_terminated_length": 1201.5625, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.618123624724945, "frac_reward_zero_std": 0.0, "grad_norm": 3.0157263798129903, "kl": 0.013275146484375, "learning_rate": 4.443156869030468e-07, "loss": -0.0321, "num_tokens": 135115417.0, "reward": 0.0, "reward_std": 0.6051969528198242, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03099740495886119, "rewards/wordcountpos_reward/raw_geo/std": 0.11030883317202657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1138.125, "completions/mean_terminated_length": 1114.0, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.6183236647329466, "frac_reward_zero_std": 0.0, "grad_norm": 3.338313950713091, "kl": 0.0175018310546875, "learning_rate": 4.44010272383333e-07, "loss": -0.0118, "num_tokens": 135165491.0, "reward": 2.0489096641540527e-08, "reward_std": 0.9716876745223999, "rewards/wordcountpos_reward/mean": 2.0489096641540527e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14545534717569913, "rewards/wordcountpos_reward/raw_geo/std": 0.233406187573755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818417, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1194.5625, "completions/mean_terminated_length": 1150.9285888671875, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.6185237047409482, "frac_reward_zero_std": 0.0, "grad_norm": 2.770049313843344, "kl": 0.010986328125, "learning_rate": 4.4370490954469163e-07, "loss": -0.0163, "num_tokens": 135208484.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9247745275497437, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09929391647209253, "rewards/wordcountpos_reward/raw_geo/std": 0.06451893214222387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1181.3125, "completions/mean_terminated_length": 1181.3125, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.6187237447489498, "frac_reward_zero_std": 0.0, "grad_norm": 3.243426062614555, "kl": 0.01751708984375, "learning_rate": 4.4339959853601927e-07, "loss": 0.0429, "num_tokens": 135244649.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8939130306243896, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11207426085446043, "rewards/wordcountpos_reward/raw_geo/std": 0.34557195627472387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1301.1875, "completions/mean_terminated_length": 1272.7857666015625, "completions/min_length": 1128.0, "completions/min_terminated_length": 1128.0, "epoch": 0.6189237847569514, "frac_reward_zero_std": 0.0, "grad_norm": 3.0023187196187116, "kl": 0.0145111083984375, "learning_rate": 4.4309433950618646e-07, "loss": -0.0087, "num_tokens": 135293612.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8135339021682739, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.3884325880815435, "rewards/wordcountpos_reward/raw_geo/std": 0.274152177678061, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1313.4375, "completions/mean_terminated_length": 1301.0001220703125, "completions/min_length": 1067.0, "completions/min_terminated_length": 1067.0, "epoch": 0.619123824764953, "frac_reward_zero_std": 0.0, "grad_norm": 1.9206190889195087, "kl": 0.0070953369140625, "learning_rate": 4.427891326040393e-07, "loss": 0.0125, "num_tokens": 135333315.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0606616735458374, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01585296785586634, "rewards/wordcountpos_reward/raw_geo/std": 0.07598643251736716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1086.6875, "completions/mean_terminated_length": 1086.6875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.6193238647729545, "frac_reward_zero_std": 0.0, "grad_norm": 3.2617030027264375, "kl": 0.0190277099609375, "learning_rate": 4.4248397797839786e-07, "loss": 0.0293, "num_tokens": 135384334.0, "reward": 2.9802322387695312e-08, "reward_std": 1.022139310836792, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13400801332234594, "rewards/wordcountpos_reward/raw_geo/std": 0.1836473036186573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1361.375, "completions/mean_terminated_length": 1329.3846435546875, "completions/min_length": 1202.0, "completions/min_terminated_length": 1202.0, "epoch": 0.6195239047809562, "frac_reward_zero_std": 0.0, "grad_norm": 2.67000801974029, "kl": 0.0141448974609375, "learning_rate": 4.4217887577805715e-07, "loss": 0.017, "num_tokens": 135426892.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0545480251312256, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054638622218651595, "rewards/wordcountpos_reward/raw_geo/std": 0.06651190021030697, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1031.0, "completions/max_terminated_length": 1031.0, "completions/mean_length": 931.5, "completions/mean_terminated_length": 931.5, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.6197239447889578, "frac_reward_zero_std": 0.0, "grad_norm": 3.98656353622943, "kl": 0.02325439453125, "learning_rate": 4.418738261517859e-07, "loss": 0.0201, "num_tokens": 135472780.0, "reward": 0.0, "reward_std": 0.7528901696205139, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15838663783280316, "rewards/wordcountpos_reward/raw_geo/std": 0.08174907680193232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1086.5, "completions/mean_terminated_length": 1058.933349609375, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.6199239847969594, "frac_reward_zero_std": 0.0, "grad_norm": 2.6855608215759443, "kl": 0.010473251342773438, "learning_rate": 4.415688292483283e-07, "loss": 0.031, "num_tokens": 135519932.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8831608295440674, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08959303678709449, "rewards/wordcountpos_reward/raw_geo/std": 0.07513756425149168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1227.0, "completions/mean_terminated_length": 1188.0, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.620124024804961, "frac_reward_zero_std": 0.0, "grad_norm": 3.597553140860517, "kl": 0.01983642578125, "learning_rate": 4.412638852164018e-07, "loss": 0.0693, "num_tokens": 135562820.0, "reward": 0.0, "reward_std": 0.6649488210678101, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06099081425552416, "rewards/wordcountpos_reward/raw_geo/std": 0.14938913455219613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298547, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.6203240648129625, "frac_reward_zero_std": 0.0, "grad_norm": 3.6022421538127856, "kl": 0.0162506103515625, "learning_rate": 4.4095899420469864e-07, "loss": -0.0417, "num_tokens": 135612694.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0202072858810425, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04003342608195562, "rewards/wordcountpos_reward/raw_geo/std": 0.056604012582636275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1354.4375, "completions/mean_terminated_length": 1305.916748046875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.6205241048209642, "frac_reward_zero_std": 0.0, "grad_norm": 3.232426856053663, "kl": 0.020355224609375, "learning_rate": 4.40654156361885e-07, "loss": 0.0206, "num_tokens": 135655245.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9511234760284424, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07208202256038412, "rewards/wordcountpos_reward/raw_geo/std": 0.09116042198436183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 1049.5625, "completions/mean_terminated_length": 1019.5333862304688, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.6207241448289658, "frac_reward_zero_std": 0.0, "grad_norm": 3.1229433813864405, "kl": 0.017913818359375, "learning_rate": 4.4034937183660136e-07, "loss": 0.0065, "num_tokens": 135689638.0, "reward": 0.0, "reward_std": 0.950972318649292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007705992868535811, "rewards/wordcountpos_reward/raw_geo/std": 0.08256067928855185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 1061.6875, "completions/mean_terminated_length": 1061.6875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.6209241848369674, "frac_reward_zero_std": 0.0, "grad_norm": 3.3474342624432007, "kl": 0.018768310546875, "learning_rate": 4.4004464077746196e-07, "loss": -0.0287, "num_tokens": 135729769.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9851071238517761, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13461550698902316, "rewards/wordcountpos_reward/raw_geo/std": 0.1296811097987824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1245.5625, "completions/mean_terminated_length": 1228.60009765625, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.621124224844969, "frac_reward_zero_std": 0.0, "grad_norm": 3.09267302002351, "kl": 0.01715087890625, "learning_rate": 4.3973996333305485e-07, "loss": -0.0494, "num_tokens": 135774402.0, "reward": 0.0, "reward_std": 0.6963129639625549, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13107627439561195, "rewards/wordcountpos_reward/raw_geo/std": 0.21132208113837636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1131.125, "completions/mean_terminated_length": 1078.4285888671875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.6213242648529705, "frac_reward_zero_std": 0.0, "grad_norm": 3.36274318727129, "kl": 0.0189208984375, "learning_rate": 4.3943533965194247e-07, "loss": -0.0507, "num_tokens": 135826884.0, "reward": 0.0, "reward_std": 0.7631850838661194, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.099982005772191, "rewards/wordcountpos_reward/raw_geo/std": 0.14858643872547078, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1231.1875, "completions/mean_terminated_length": 1022.1111450195312, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.6215243048609722, "frac_reward_zero_std": 0.0, "grad_norm": 2.6138759260834425, "kl": 0.012054443359375, "learning_rate": 4.391307698826604e-07, "loss": -0.0034, "num_tokens": 135869927.0, "reward": -2.2351741790771484e-08, "reward_std": 1.024772047996521, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06818868564520018, "rewards/wordcountpos_reward/raw_geo/std": 0.12658826116950875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1306.1875, "completions/mean_terminated_length": 1261.4615478515625, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.6217243448689738, "frac_reward_zero_std": 0.0, "grad_norm": 3.307100611856583, "kl": 0.0271148681640625, "learning_rate": 4.3882625417371845e-07, "loss": 0.0107, "num_tokens": 135913546.0, "reward": 0.0, "reward_std": 0.8167073726654053, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11788238151875192, "rewards/wordcountpos_reward/raw_geo/std": 0.08558268743781756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 1076.1875, "completions/mean_terminated_length": 1076.1875, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.6219243848769754, "frac_reward_zero_std": 0.0, "grad_norm": 3.491354446510289, "kl": 0.0184783935546875, "learning_rate": 4.385217926735998e-07, "loss": -0.0286, "num_tokens": 135956733.0, "reward": 0.0, "reward_std": 0.9316313862800598, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05757622713216049, "rewards/wordcountpos_reward/raw_geo/std": 0.08407955043035165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1095.3125, "completions/mean_terminated_length": 1095.3125, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.622124424884977, "frac_reward_zero_std": 0.0, "grad_norm": 2.904872089735479, "kl": 0.0147552490234375, "learning_rate": 4.382173855307613e-07, "loss": 0.0116, "num_tokens": 135995410.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8422684073448181, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028483416497571974, "rewards/wordcountpos_reward/raw_geo/std": 0.13276191788554648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1034.125, "completions/mean_terminated_length": 1034.125, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.6223244648929785, "frac_reward_zero_std": 0.0, "grad_norm": 3.7437342030812024, "kl": 0.01898193359375, "learning_rate": 4.379130328936329e-07, "loss": 0.0232, "num_tokens": 136044460.0, "reward": 0.0, "reward_std": 1.0316739082336426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07014302355243421, "rewards/wordcountpos_reward/raw_geo/std": 0.14592302334555216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7125, "rewards/wordcountpos_reward/raw_rule/std": 0.1800720020600813, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 949.3125, "completions/mean_terminated_length": 949.3125, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.6225245049009802, "frac_reward_zero_std": 0.0, "grad_norm": 3.343674037951504, "kl": 0.017608642578125, "learning_rate": 4.37608734910619e-07, "loss": 0.0188, "num_tokens": 136070625.0, "reward": 0.0, "reward_std": 0.8417994976043701, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022812462220538444, "rewards/wordcountpos_reward/raw_geo/std": 0.1005835413466322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1377.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1113.9375, "completions/mean_terminated_length": 1113.9375, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.6227245449089818, "frac_reward_zero_std": 0.0, "grad_norm": 2.8988496719941406, "kl": 0.0129241943359375, "learning_rate": 4.373044917300961e-07, "loss": 0.0104, "num_tokens": 136123264.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6958272457122803, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07000060456038397, "rewards/wordcountpos_reward/raw_geo/std": 0.06698662233256576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1140.75, "completions/mean_terminated_length": 1140.75, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.6229245849169834, "frac_reward_zero_std": 0.0, "grad_norm": 3.5820001214353536, "kl": 0.018341064453125, "learning_rate": 4.3700030350041494e-07, "loss": -0.0153, "num_tokens": 136174684.0, "reward": 0.0, "reward_std": 0.9460127949714661, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07848365587148896, "rewards/wordcountpos_reward/raw_geo/std": 0.05425945835720945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1329.5, "completions/mean_terminated_length": 1305.1429443359375, "completions/min_length": 1149.0, "completions/min_terminated_length": 1149.0, "epoch": 0.623124624924985, "frac_reward_zero_std": 0.0, "grad_norm": 2.9661618025346796, "kl": 0.019256591796875, "learning_rate": 4.366961703698987e-07, "loss": 0.0112, "num_tokens": 136225228.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9584951400756836, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08410774228481703, "rewards/wordcountpos_reward/raw_geo/std": 0.10370680881577765, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1158.0625, "completions/mean_terminated_length": 1158.0625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.6233246649329865, "frac_reward_zero_std": 0.0, "grad_norm": 3.758960517978021, "kl": 0.027496337890625, "learning_rate": 4.363920924868446e-07, "loss": 0.071, "num_tokens": 136278397.0, "reward": 0.0, "reward_std": 0.4740045666694641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06836603038547888, "rewards/wordcountpos_reward/raw_geo/std": 0.0506006502790494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1753303759784389, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1193.6875, "completions/mean_terminated_length": 1193.6875, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.6235247049409882, "frac_reward_zero_std": 0.0, "grad_norm": 3.195496371356709, "kl": 0.017913818359375, "learning_rate": 4.3608806999952177e-07, "loss": 0.0365, "num_tokens": 136324344.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0362813472747803, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012995440188186842, "rewards/wordcountpos_reward/raw_geo/std": 0.10027948790273193, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1377.0625, "completions/mean_terminated_length": 1219.0, "completions/min_length": 1049.0, "completions/min_terminated_length": 1049.0, "epoch": 0.6237247449489898, "frac_reward_zero_std": 0.0, "grad_norm": 3.2884466909951593, "kl": 0.0185546875, "learning_rate": 4.3578410305617343e-07, "loss": -0.031, "num_tokens": 136372209.0, "reward": 0.0, "reward_std": 0.4567936360836029, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.020366429976601333, "rewards/wordcountpos_reward/raw_geo/std": 0.19186363205546747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 1.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1003.21435546875, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.6239247849569914, "frac_reward_zero_std": 0.0, "grad_norm": 2.999668494835926, "kl": 0.01495361328125, "learning_rate": 4.3548019180501507e-07, "loss": -0.024, "num_tokens": 136406350.0, "reward": 0.0, "reward_std": 1.0689244270324707, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07968441040720298, "rewards/wordcountpos_reward/raw_geo/std": 0.08614548001434684, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15341785110291775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1076.1875, "completions/mean_terminated_length": 1076.1875, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.624124824964993, "frac_reward_zero_std": 0.0, "grad_norm": 2.8376223207478577, "kl": 0.0162506103515625, "learning_rate": 4.3517633639423513e-07, "loss": -0.026, "num_tokens": 136446321.0, "reward": 0.0, "reward_std": 0.7436105608940125, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011216948733196367, "rewards/wordcountpos_reward/raw_geo/std": 0.09842171726710647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1243.25, "completions/mean_terminated_length": 1043.5555419921875, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.6243248649729946, "frac_reward_zero_std": 0.0, "grad_norm": 1.6795756302033804, "kl": 0.0067138671875, "learning_rate": 4.34872536971995e-07, "loss": -0.0135, "num_tokens": 136493013.0, "reward": 0.0, "reward_std": 0.6699658632278442, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.020007539447670704, "rewards/wordcountpos_reward/raw_geo/std": 0.03999147719664196, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 1063.1875, "completions/mean_terminated_length": 1034.0667724609375, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.6245249049809962, "frac_reward_zero_std": 0.0, "grad_norm": 3.348727657322225, "kl": 0.018768310546875, "learning_rate": 4.3456879368642875e-07, "loss": -0.0333, "num_tokens": 136536384.0, "reward": 4.470348358154297e-08, "reward_std": 0.9143439531326294, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10314124384097399, "rewards/wordcountpos_reward/raw_geo/std": 0.08986091001696425, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1281.875, "completions/mean_terminated_length": 1267.3333740234375, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.6247249449889978, "frac_reward_zero_std": 0.0, "grad_norm": 2.975535824774135, "kl": 0.0183563232421875, "learning_rate": 4.3426510668564275e-07, "loss": -0.0328, "num_tokens": 136592710.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9043763279914856, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08852240013287122, "rewards/wordcountpos_reward/raw_geo/std": 0.20807872109870024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1514742369000235, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1195.4375, "completions/mean_terminated_length": 1175.1334228515625, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.6249249849969994, "frac_reward_zero_std": 0.0, "grad_norm": 3.187729610824358, "kl": 0.0200958251953125, "learning_rate": 4.3396147611771636e-07, "loss": -0.0138, "num_tokens": 136646181.0, "reward": 0.0, "reward_std": 0.6111184358596802, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06943740880733087, "rewards/wordcountpos_reward/raw_geo/std": 0.25757980888639476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1005.4375, "completions/mean_terminated_length": 1005.4375, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.625125025005001, "frac_reward_zero_std": 0.0, "grad_norm": 3.5553153337475036, "kl": 0.0158233642578125, "learning_rate": 4.33657902130701e-07, "loss": -0.0289, "num_tokens": 136694956.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9227724075317383, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.031132500030651742, "rewards/wordcountpos_reward/raw_geo/std": 0.14678420505132314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1138.0, "completions/max_terminated_length": 1138.0, "completions/mean_length": 888.75, "completions/mean_terminated_length": 888.75, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.6253250650130026, "frac_reward_zero_std": 0.0, "grad_norm": 3.7923028866760355, "kl": 0.020660400390625, "learning_rate": 4.33354384872621e-07, "loss": -0.0144, "num_tokens": 136733832.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0171772241592407, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12260950925468672, "rewards/wordcountpos_reward/raw_geo/std": 0.0922128358850012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 957.125, "completions/mean_terminated_length": 957.125, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.6255251050210042, "frac_reward_zero_std": 0.0, "grad_norm": 2.8301652138591806, "kl": 0.01324462890625, "learning_rate": 4.330509244914725e-07, "loss": -0.0322, "num_tokens": 136764410.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8174680471420288, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10460300525837883, "rewards/wordcountpos_reward/raw_geo/std": 0.05652856844034625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1254.6875, "completions/mean_terminated_length": 1254.6875, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.6257251450290058, "frac_reward_zero_std": 0.0, "grad_norm": 1.2284090192892878, "kl": 0.0068511962890625, "learning_rate": 4.3274752113522427e-07, "loss": -0.0009, "num_tokens": 136805237.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6742169260978699, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2002184298992174, "rewards/wordcountpos_reward/raw_geo/std": 0.20226164170945674, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1180.5625, "completions/mean_terminated_length": 1180.5625, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.6259251850370074, "frac_reward_zero_std": 0.0, "grad_norm": 2.9525941237087805, "kl": 0.0201263427734375, "learning_rate": 4.324441749518172e-07, "loss": -0.0143, "num_tokens": 136855982.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7303072214126587, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19069953718727026, "rewards/wordcountpos_reward/raw_geo/std": 0.23197532171415758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 1222.75, "completions/mean_terminated_length": 1007.1111450195312, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.626125225045009, "frac_reward_zero_std": 0.0, "grad_norm": 3.147024137519835, "kl": 0.0204315185546875, "learning_rate": 4.3214088608916434e-07, "loss": -0.0192, "num_tokens": 136911946.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0684173107147217, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12587723418465416, "rewards/wordcountpos_reward/raw_geo/std": 0.09091793305415105, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1145.6875, "completions/mean_terminated_length": 1145.6875, "completions/min_length": 973.0, "completions/min_terminated_length": 973.0, "epoch": 0.6263252650530106, "frac_reward_zero_std": 0.0, "grad_norm": 3.1083863482148293, "kl": 0.01910400390625, "learning_rate": 4.3183765469515046e-07, "loss": 0.0068, "num_tokens": 136958197.0, "reward": 0.0, "reward_std": 0.9835408926010132, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04601649322008153, "rewards/wordcountpos_reward/raw_geo/std": 0.08231569206766891, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1251.75, "completions/mean_terminated_length": 1058.6666259765625, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.6265253050610122, "frac_reward_zero_std": 0.0, "grad_norm": 3.143598600776574, "kl": 0.017425537109375, "learning_rate": 4.3153448091763277e-07, "loss": -0.0245, "num_tokens": 137002809.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8519697189331055, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15945093524353068, "rewards/wordcountpos_reward/raw_geo/std": 0.07959059285060198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 1118.4375, "completions/mean_terminated_length": 1118.4375, "completions/min_length": 1015.0, "completions/min_terminated_length": 1015.0, "epoch": 0.6267253450690138, "frac_reward_zero_std": 0.0, "grad_norm": 3.8590773194277226, "kl": 0.020050048828125, "learning_rate": 4.3123136490444e-07, "loss": -0.0196, "num_tokens": 137042256.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7092183828353882, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.044022607896974764, "rewards/wordcountpos_reward/raw_geo/std": 0.19554817650767384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1324.5, "completions/mean_terminated_length": 1324.5, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.6269253850770155, "frac_reward_zero_std": 0.0, "grad_norm": 2.2524194329875793, "kl": 0.01092529296875, "learning_rate": 4.309283068033733e-07, "loss": -0.0308, "num_tokens": 137092544.0, "reward": 0.0, "reward_std": 0.531428873538971, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07982228281251035, "rewards/wordcountpos_reward/raw_geo/std": 0.048819867283929536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 989.375, "completions/mean_terminated_length": 989.375, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.627125425085017, "frac_reward_zero_std": 0.0, "grad_norm": 3.3145214379010537, "kl": 0.0135498046875, "learning_rate": 4.3062530676220467e-07, "loss": -0.0341, "num_tokens": 137134878.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9505296349525452, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007778918375465526, "rewards/wordcountpos_reward/raw_geo/std": 0.25459101991743477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1048.0625, "completions/mean_terminated_length": 1048.0625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.6273254650930186, "frac_reward_zero_std": 0.0, "grad_norm": 3.6138123395313606, "kl": 0.02239990234375, "learning_rate": 4.3032236492867867e-07, "loss": -0.0082, "num_tokens": 137183735.0, "reward": 7.450580596923828e-09, "reward_std": 1.0199977159500122, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02106520379901223, "rewards/wordcountpos_reward/raw_geo/std": 0.30324278528287935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1238.75, "completions/mean_terminated_length": 1201.4285888671875, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.6275255051010202, "frac_reward_zero_std": 0.0, "grad_norm": 2.8842742420861502, "kl": 0.014892578125, "learning_rate": 4.3001948145051084e-07, "loss": 0.0227, "num_tokens": 137215667.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9994112253189087, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013582581776973367, "rewards/wordcountpos_reward/raw_geo/std": 0.03806874756099391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1111.125, "completions/mean_terminated_length": 1111.125, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.6277255451090218, "frac_reward_zero_std": 0.0, "grad_norm": 2.9089971791555302, "kl": 0.0136260986328125, "learning_rate": 4.2971665647538867e-07, "loss": -0.022, "num_tokens": 137260333.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7089630365371704, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07684923263423817, "rewards/wordcountpos_reward/raw_geo/std": 0.04332587495660774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11547005383792516, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1142.6923828125, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.6279255851170235, "frac_reward_zero_std": 0.0, "grad_norm": 2.8621422081394217, "kl": 0.0117645263671875, "learning_rate": 4.294138901509711e-07, "loss": -0.0093, "num_tokens": 137296448.0, "reward": 0.0, "reward_std": 1.0030618906021118, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11242980379027251, "rewards/wordcountpos_reward/raw_geo/std": 0.05486865219390619, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1075.8125, "completions/mean_terminated_length": 1075.8125, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.628125625125025, "frac_reward_zero_std": 0.0, "grad_norm": 3.219022937471707, "kl": 0.02117919921875, "learning_rate": 4.2911118262488835e-07, "loss": 0.015, "num_tokens": 137340949.0, "reward": 0.0, "reward_std": 0.7940762042999268, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06144418821050447, "rewards/wordcountpos_reward/raw_geo/std": 0.14026446971736195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13977495139343474, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1259.375, "completions/mean_terminated_length": 1115.0, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.6283256651330266, "frac_reward_zero_std": 0.0, "grad_norm": 2.7511882803185217, "kl": 0.01361083984375, "learning_rate": 4.288085340447416e-07, "loss": 0.0182, "num_tokens": 137383387.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5850831270217896, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09145940782547742, "rewards/wordcountpos_reward/raw_geo/std": 0.22238292570687454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1287.125, "completions/mean_terminated_length": 1216.166748046875, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6285257051410282, "frac_reward_zero_std": 0.0, "grad_norm": 2.519086427899234, "kl": 0.015106201171875, "learning_rate": 4.285059445581043e-07, "loss": -0.0267, "num_tokens": 137435333.0, "reward": 0.0, "reward_std": 0.7257482409477234, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17555768718538414, "rewards/wordcountpos_reward/raw_geo/std": 0.11882335419206026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1097.375, "completions/mean_terminated_length": 1097.375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.6287257451490298, "frac_reward_zero_std": 0.0, "grad_norm": 2.9006803274747037, "kl": 0.01678466796875, "learning_rate": 4.282034143125198e-07, "loss": 0.0158, "num_tokens": 137475595.0, "reward": 0.0, "reward_std": 0.9892991781234741, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.016728898303048387, "rewards/wordcountpos_reward/raw_geo/std": 0.06106379554187353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.13214750456578045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1206.75, "completions/mean_terminated_length": 1187.2000732421875, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.6289257851570315, "frac_reward_zero_std": 0.0, "grad_norm": 2.996347965319191, "kl": 0.01446533203125, "learning_rate": 4.2790094345550387e-07, "loss": 0.0206, "num_tokens": 137526207.0, "reward": 5.960464477539063e-08, "reward_std": 0.4127252697944641, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04037583197329799, "rewards/wordcountpos_reward/raw_geo/std": 0.1877056538249397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1306.25, "completions/mean_terminated_length": 1306.25, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.629125825165033, "frac_reward_zero_std": 0.0, "grad_norm": 2.2569802967962986, "kl": 0.010223388671875, "learning_rate": 4.275985321345421e-07, "loss": 0.0256, "num_tokens": 137568091.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0411725044250488, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.005346235930101662, "rewards/wordcountpos_reward/raw_geo/std": 0.049994529279100926, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1236.1875, "completions/mean_terminated_length": 1175.3077392578125, "completions/min_length": 1012.0, "completions/min_terminated_length": 1012.0, "epoch": 0.6293258651730346, "frac_reward_zero_std": 0.0, "grad_norm": 3.492931333583683, "kl": 0.01934814453125, "learning_rate": 4.2729618049709207e-07, "loss": 0.0597, "num_tokens": 137610174.0, "reward": 0.0, "reward_std": 0.6825693845748901, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2792611313366085, "rewards/wordcountpos_reward/raw_geo/std": 0.10788554497448512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470285, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1251.9375, "completions/mean_terminated_length": 1169.25, "completions/min_length": 1040.0, "completions/min_terminated_length": 1040.0, "epoch": 0.6295259051810362, "frac_reward_zero_std": 0.0, "grad_norm": 2.4374236370785645, "kl": 0.0146484375, "learning_rate": 4.269938886905815e-07, "loss": -0.021, "num_tokens": 137664029.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7416058778762817, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08479620884729971, "rewards/wordcountpos_reward/raw_geo/std": 0.16927226328402617, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1175.875, "completions/mean_terminated_length": 1154.2667236328125, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.6297259451890378, "frac_reward_zero_std": 0.0, "grad_norm": 3.4138221100882706, "kl": 0.017852783203125, "learning_rate": 4.266916568624094e-07, "loss": 0.0323, "num_tokens": 137706899.0, "reward": 3.3527612686157227e-08, "reward_std": 1.0138828754425049, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.27775027149549436, "rewards/wordcountpos_reward/raw_geo/std": 0.14843746846792213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1190.5, "completions/mean_terminated_length": 1190.5, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.6299259851970395, "frac_reward_zero_std": 0.0, "grad_norm": 3.5831411150308767, "kl": 0.02252197265625, "learning_rate": 4.263894851599453e-07, "loss": 0.0103, "num_tokens": 137754923.0, "reward": 0.0, "reward_std": 0.8022862076759338, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12509126988051933, "rewards/wordcountpos_reward/raw_geo/std": 0.21292112977629343, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1285.75, "completions/mean_terminated_length": 1236.3077392578125, "completions/min_length": 1077.0, "completions/min_terminated_length": 1077.0, "epoch": 0.630126025205041, "frac_reward_zero_std": 0.0, "grad_norm": 2.8767192740448677, "kl": 0.0178680419921875, "learning_rate": 4.260873737305296e-07, "loss": -0.0014, "num_tokens": 137813039.0, "reward": 0.0, "reward_std": 0.6470840573310852, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1129755098715549, "rewards/wordcountpos_reward/raw_geo/std": 0.12523643267316018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1131.0625, "completions/mean_terminated_length": 1131.0625, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.6303260652130426, "frac_reward_zero_std": 0.0, "grad_norm": 3.054272729691205, "kl": 0.0172119140625, "learning_rate": 4.2578532272147315e-07, "loss": 0.0008, "num_tokens": 137864680.0, "reward": -5.960464477539063e-08, "reward_std": 0.6156487464904785, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07433488387835024, "rewards/wordcountpos_reward/raw_geo/std": 0.17582227241909348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.15006171569897006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1095.0, "completions/max_terminated_length": 1095.0, "completions/mean_length": 911.0, "completions/mean_terminated_length": 911.0, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.6305261052210442, "frac_reward_zero_std": 0.0, "grad_norm": 3.4444005092117433, "kl": 0.0147247314453125, "learning_rate": 4.254833322800574e-07, "loss": 0.0082, "num_tokens": 137911944.0, "reward": -5.960464477539063e-08, "reward_std": 0.724614143371582, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1829866732192168, "rewards/wordcountpos_reward/raw_geo/std": 0.19025601721986016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1125.0, "completions/mean_terminated_length": 1125.0, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.6307261452290458, "frac_reward_zero_std": 0.0, "grad_norm": 3.2419209860956366, "kl": 0.0154571533203125, "learning_rate": 4.251814025535342e-07, "loss": 0.0042, "num_tokens": 137951776.0, "reward": 0.0, "reward_std": 0.695215106010437, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.059826934748479846, "rewards/wordcountpos_reward/raw_geo/std": 0.0759976994374177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1158.0, "completions/mean_terminated_length": 1158.0, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.6309261852370474, "frac_reward_zero_std": 0.0, "grad_norm": 3.3547164806463567, "kl": 0.01763916015625, "learning_rate": 4.2487953368912587e-07, "loss": -0.0177, "num_tokens": 137988544.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9726671576499939, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1237149433931696, "rewards/wordcountpos_reward/raw_geo/std": 0.09302693984489648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1180.0625, "completions/mean_terminated_length": 1180.0625, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.631126225245049, "frac_reward_zero_std": 0.0, "grad_norm": 1.8164321349595387, "kl": 0.006137847900390625, "learning_rate": 4.2457772583402497e-07, "loss": -0.0301, "num_tokens": 138037417.0, "reward": 0.0, "reward_std": 0.6919116973876953, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15638636251601382, "rewards/wordcountpos_reward/raw_geo/std": 0.13282673232780468, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 1049.25, "completions/mean_terminated_length": 1049.25, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.6313262652530506, "frac_reward_zero_std": 0.0, "grad_norm": 3.4075598191805594, "kl": 0.0157012939453125, "learning_rate": 4.242759791353947e-07, "loss": -0.02, "num_tokens": 138079677.0, "reward": 0.0, "reward_std": 0.739166259765625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021039962651943106, "rewards/wordcountpos_reward/raw_geo/std": 0.12368090898064193, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.077817450199525, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1211.375, "completions/mean_terminated_length": 1211.375, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.6315263052610522, "frac_reward_zero_std": 0.0, "grad_norm": 2.397414599803794, "kl": 0.0103607177734375, "learning_rate": 4.2397429374036774e-07, "loss": -0.0167, "num_tokens": 138128131.0, "reward": 2.9802322387695312e-08, "reward_std": 0.844476580619812, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03859910824438735, "rewards/wordcountpos_reward/raw_geo/std": 0.08250346497483654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1110.625, "completions/mean_terminated_length": 1110.625, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.6317263452690538, "frac_reward_zero_std": 0.0, "grad_norm": 3.995371645268981, "kl": 0.021697998046875, "learning_rate": 4.236726697960477e-07, "loss": -0.0028, "num_tokens": 138179613.0, "reward": 7.450580596923828e-09, "reward_std": 1.0232713222503662, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07647790185267496, "rewards/wordcountpos_reward/raw_geo/std": 0.09964225763365624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15049301694147857, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1204.0625, "completions/mean_terminated_length": 1184.3333740234375, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.6319263852770554, "frac_reward_zero_std": 0.0, "grad_norm": 2.9361891729733274, "kl": 0.0195159912109375, "learning_rate": 4.233711074495073e-07, "loss": 0.0145, "num_tokens": 138229078.0, "reward": -5.960464477539063e-08, "reward_std": 0.6829382181167603, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14713966722240807, "rewards/wordcountpos_reward/raw_geo/std": 0.12739990276118485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1107.75, "completions/mean_terminated_length": 1081.60009765625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.632126425285057, "frac_reward_zero_std": 0.0, "grad_norm": 3.5870329823610634, "kl": 0.020782470703125, "learning_rate": 4.230696068477898e-07, "loss": -0.0418, "num_tokens": 138265714.0, "reward": 0.0, "reward_std": 1.0351190567016602, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.009153498549220604, "rewards/wordcountpos_reward/raw_geo/std": 0.08613567607672429, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16771890063326086, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1001.4375, "completions/mean_terminated_length": 1001.4375, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.6323264652930586, "frac_reward_zero_std": 0.0, "grad_norm": 3.1329639877721007, "kl": 0.0137176513671875, "learning_rate": 4.227681681379087e-07, "loss": -0.0025, "num_tokens": 138302369.0, "reward": -2.60770320892334e-08, "reward_std": 0.9084837436676025, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023782785398485065, "rewards/wordcountpos_reward/raw_geo/std": 0.058951374116395734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1030.0, "completions/max_terminated_length": 1030.0, "completions/mean_length": 858.1875, "completions/mean_terminated_length": 858.1875, "completions/min_length": 661.0, "completions/min_terminated_length": 661.0, "epoch": 0.6325265053010602, "frac_reward_zero_std": 0.0, "grad_norm": 3.9911646022568674, "kl": 0.023529052734375, "learning_rate": 4.224667914668463e-07, "loss": -0.0236, "num_tokens": 138339092.0, "reward": -3.725290298461914e-08, "reward_std": 1.0415786504745483, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006611182965728974, "rewards/wordcountpos_reward/raw_geo/std": 0.10815917983660134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1299572579307862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1147.5, "completions/mean_terminated_length": 1124.0, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.6327265453090618, "frac_reward_zero_std": 0.0, "grad_norm": 3.0753079091532136, "kl": 0.019744873046875, "learning_rate": 4.221654769815557e-07, "loss": 0.0177, "num_tokens": 138370140.0, "reward": 0.0, "reward_std": 0.3636721968650818, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05834275924806642, "rewards/wordcountpos_reward/raw_geo/std": 0.09070264270096025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1185.75, "completions/mean_terminated_length": 1164.800048828125, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.6329265853170634, "frac_reward_zero_std": 0.0, "grad_norm": 2.128902042165283, "kl": 0.0075531005859375, "learning_rate": 4.2186422482895876e-07, "loss": -0.0096, "num_tokens": 138421576.0, "reward": -7.450580596923828e-09, "reward_std": 1.059860348701477, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.014303152405624322, "rewards/wordcountpos_reward/raw_geo/std": 0.06917697483149168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1096.1875, "completions/mean_terminated_length": 1003.0000610351562, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.633126625325065, "frac_reward_zero_std": 0.0, "grad_norm": 3.4154706759822644, "kl": 0.0173492431640625, "learning_rate": 4.2156303515594784e-07, "loss": 0.0676, "num_tokens": 138474923.0, "reward": 2.2351741790771484e-08, "reward_std": 1.044196605682373, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07080667838718353, "rewards/wordcountpos_reward/raw_geo/std": 0.3048872346462518, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1269.9375, "completions/mean_terminated_length": 1254.60009765625, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.6333266653330666, "frac_reward_zero_std": 0.0, "grad_norm": 2.827212656737832, "kl": 0.0156707763671875, "learning_rate": 4.21261908109384e-07, "loss": -0.0191, "num_tokens": 138525746.0, "reward": 0.0, "reward_std": 0.845740795135498, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10135150297324756, "rewards/wordcountpos_reward/raw_geo/std": 0.10622111969833668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1156.625, "completions/mean_terminated_length": 1077.3846435546875, "completions/min_length": 606.0, "completions/min_terminated_length": 606.0, "epoch": 0.6335267053410683, "frac_reward_zero_std": 0.0, "grad_norm": 3.486162378322106, "kl": 0.016632080078125, "learning_rate": 4.209608438360985e-07, "loss": -0.0306, "num_tokens": 138571772.0, "reward": 0.0, "reward_std": 0.8417177796363831, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.038805254526555526, "rewards/wordcountpos_reward/raw_geo/std": 0.3632715837888754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1365582225578092, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1155.3125, "completions/mean_terminated_length": 1155.3125, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.6337267453490698, "frac_reward_zero_std": 0.0, "grad_norm": 3.0337971051436163, "kl": 0.01629638671875, "learning_rate": 4.2065984248289144e-07, "loss": 0.0175, "num_tokens": 138606545.0, "reward": 0.0, "reward_std": 0.8297116756439209, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14323674823104918, "rewards/wordcountpos_reward/raw_geo/std": 0.22189932544182553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1195.125, "completions/mean_terminated_length": 1174.800048828125, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.6339267853570714, "frac_reward_zero_std": 0.0, "grad_norm": 2.456368530226249, "kl": 0.0115966796875, "learning_rate": 4.2035890419653264e-07, "loss": -0.0237, "num_tokens": 138644203.0, "reward": 0.0, "reward_std": 1.0492595434188843, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0816374074074803, "rewards/wordcountpos_reward/raw_geo/std": 0.10116499064537156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1224.0, "completions/mean_terminated_length": 1184.571533203125, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.634126825365073, "frac_reward_zero_std": 0.0, "grad_norm": 3.86451104258521, "kl": 0.032501220703125, "learning_rate": 4.2005802912376063e-07, "loss": 0.0219, "num_tokens": 138698635.0, "reward": 0.0, "reward_std": 1.0653672218322754, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20006598294262912, "rewards/wordcountpos_reward/raw_geo/std": 0.348461386774674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 1020.0625, "completions/mean_terminated_length": 1020.0625, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.6343268653730746, "frac_reward_zero_std": 0.0, "grad_norm": 3.0880004268260985, "kl": 0.01446533203125, "learning_rate": 4.197572174112839e-07, "loss": 0.0164, "num_tokens": 138729364.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0490856170654297, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13933407333900025, "rewards/wordcountpos_reward/raw_geo/std": 0.07462147037502566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 1076.3125, "completions/mean_terminated_length": 1076.3125, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.6345269053810763, "frac_reward_zero_std": 0.0, "grad_norm": 3.3956573504591967, "kl": 0.016510009765625, "learning_rate": 4.1945646920577927e-07, "loss": -0.0515, "num_tokens": 138774185.0, "reward": 0.0, "reward_std": 1.0545827150344849, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00839357872436973, "rewards/wordcountpos_reward/raw_geo/std": 0.038098341141220356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1218.125, "completions/mean_terminated_length": 1177.857177734375, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.6347269453890778, "frac_reward_zero_std": 0.0, "grad_norm": 3.1168741925217693, "kl": 0.016845703125, "learning_rate": 4.1915578465389314e-07, "loss": -0.068, "num_tokens": 138821299.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8501385450363159, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11426142188972964, "rewards/wordcountpos_reward/raw_geo/std": 0.10068884630258255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1233.875, "completions/mean_terminated_length": 1216.1334228515625, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.6349269853970794, "frac_reward_zero_std": 0.0, "grad_norm": 2.9251251264277136, "kl": 0.01751708984375, "learning_rate": 4.188551639022403e-07, "loss": 0.0033, "num_tokens": 138868865.0, "reward": 0.0, "reward_std": 0.722690224647522, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1502375321449283, "rewards/wordcountpos_reward/raw_geo/std": 0.18092361511165608, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1105.0625, "completions/mean_terminated_length": 1048.6429443359375, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.635127025405081, "frac_reward_zero_std": 0.0, "grad_norm": 3.21727951540988, "kl": 0.01629638671875, "learning_rate": 4.1855460709740533e-07, "loss": 0.0217, "num_tokens": 138913482.0, "reward": 0.0, "reward_std": 0.7833198308944702, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07926107278496829, "rewards/wordcountpos_reward/raw_geo/std": 0.1087448109412782, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1116.0, "completions/mean_terminated_length": 988.0, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.6353270654130826, "frac_reward_zero_std": 0.0, "grad_norm": 3.580310596955016, "kl": 0.0178375244140625, "learning_rate": 4.182541143859406e-07, "loss": -0.0224, "num_tokens": 138950890.0, "reward": 0.0, "reward_std": 0.5500986576080322, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006112918066994897, "rewards/wordcountpos_reward/raw_geo/std": 0.08895261254083829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1356.0, "completions/mean_terminated_length": 1346.4000244140625, "completions/min_length": 1180.0, "completions/min_terminated_length": 1180.0, "epoch": 0.6355271054210843, "frac_reward_zero_std": 0.0, "grad_norm": 2.129713188230861, "kl": 0.0120849609375, "learning_rate": 4.179536859143682e-07, "loss": -0.0075, "num_tokens": 139003090.0, "reward": 0.0, "reward_std": 0.6718524694442749, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.030913741711836334, "rewards/wordcountpos_reward/raw_geo/std": 0.32101218933713555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1259.0, "completions/mean_terminated_length": 1242.933349609375, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.6357271454290858, "frac_reward_zero_std": 0.0, "grad_norm": 2.9711060434549483, "kl": 0.0169219970703125, "learning_rate": 4.176533218291779e-07, "loss": -0.0075, "num_tokens": 139050002.0, "reward": 2.9802322387695312e-08, "reward_std": 0.737159252166748, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028973697039161004, "rewards/wordcountpos_reward/raw_geo/std": 0.17752584112020553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1041.0, "completions/mean_terminated_length": 1041.0, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.6359271854370874, "frac_reward_zero_std": 0.0, "grad_norm": 3.7032945713847933, "kl": 0.0201416015625, "learning_rate": 4.17353022276829e-07, "loss": 0.0118, "num_tokens": 139087674.0, "reward": 0.0, "reward_std": 0.7623980045318604, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0023477669421095686, "rewards/wordcountpos_reward/raw_geo/std": 0.10983846252860491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 1015.3125, "completions/mean_terminated_length": 1015.3125, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.636127225445089, "frac_reward_zero_std": 0.0, "grad_norm": 2.6396989217289977, "kl": 0.0126800537109375, "learning_rate": 4.1705278740374873e-07, "loss": -0.0151, "num_tokens": 139133551.0, "reward": -2.2351741790771484e-08, "reward_std": 1.065057396888733, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017435515317303918, "rewards/wordcountpos_reward/raw_geo/std": 0.1654714880397331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1095.4375, "completions/mean_terminated_length": 1095.4375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.6363272654530906, "frac_reward_zero_std": 0.0, "grad_norm": 3.3032232783834723, "kl": 0.018951416015625, "learning_rate": 4.1675261735633314e-07, "loss": 0.0193, "num_tokens": 139164174.0, "reward": 0.0, "reward_std": 0.6040237545967102, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003129325822155848, "rewards/wordcountpos_reward/raw_geo/std": 0.07845434241887701, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 1102.1875, "completions/mean_terminated_length": 1102.1875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.6365273054610923, "frac_reward_zero_std": 0.0, "grad_norm": 3.5168791325543016, "kl": 0.022552490234375, "learning_rate": 4.16452512280946e-07, "loss": 0.0148, "num_tokens": 139207065.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9177701473236084, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07420985684212461, "rewards/wordcountpos_reward/raw_geo/std": 0.06202499787923471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1164.5, "completions/mean_terminated_length": 963.2000122070312, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.6367273454690938, "frac_reward_zero_std": 0.0, "grad_norm": 3.5423927945861604, "kl": 0.016082763671875, "learning_rate": 4.161524723239208e-07, "loss": -0.0563, "num_tokens": 139249073.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9408705234527588, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04337885134731523, "rewards/wordcountpos_reward/raw_geo/std": 0.12697802587873494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1263.5, "completions/mean_terminated_length": 1263.5, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.6369273854770954, "frac_reward_zero_std": 0.0, "grad_norm": 2.9513066684020273, "kl": 0.0170135498046875, "learning_rate": 4.158524976315575e-07, "loss": -0.0061, "num_tokens": 139300697.0, "reward": 0.0, "reward_std": 0.8119181394577026, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04338759059539542, "rewards/wordcountpos_reward/raw_geo/std": 0.036610323160661554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1307.0, "completions/max_terminated_length": 1307.0, "completions/mean_length": 1096.25, "completions/mean_terminated_length": 1096.25, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.637127425485097, "frac_reward_zero_std": 0.0, "grad_norm": 3.211517615381658, "kl": 0.0182647705078125, "learning_rate": 4.1555258835012584e-07, "loss": 0.031, "num_tokens": 139341429.0, "reward": -1.4901161193847656e-08, "reward_std": 0.975369930267334, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08230563692551206, "rewards/wordcountpos_reward/raw_geo/std": 0.07129886665390216, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1161.4375, "completions/mean_terminated_length": 1161.4375, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.6373274654930986, "frac_reward_zero_std": 0.0, "grad_norm": 3.02713928967306, "kl": 0.01287841796875, "learning_rate": 4.1525274462586234e-07, "loss": 0.0364, "num_tokens": 139387588.0, "reward": 0.0, "reward_std": 0.5174393057823181, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15579264809421434, "rewards/wordcountpos_reward/raw_geo/std": 0.11626899711317917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1066.875, "completions/mean_terminated_length": 1066.875, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.6375275055011003, "frac_reward_zero_std": 0.0, "grad_norm": 3.05096966698885, "kl": 0.0200347900390625, "learning_rate": 4.149529666049726e-07, "loss": -0.0141, "num_tokens": 139434634.0, "reward": 0.0, "reward_std": 0.5078494548797607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10066545814301987, "rewards/wordcountpos_reward/raw_geo/std": 0.15769856860539258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1034.3125, "completions/mean_terminated_length": 967.7857666015625, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.6377275455091018, "frac_reward_zero_std": 0.0, "grad_norm": 3.3253126746939916, "kl": 0.031646728515625, "learning_rate": 4.1465325443362943e-07, "loss": 0.0212, "num_tokens": 139474319.0, "reward": 0.0, "reward_std": 0.9544169902801514, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005409415610430434, "rewards/wordcountpos_reward/raw_geo/std": 0.05428595860087591, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18678567634829202, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1118.75, "completions/mean_terminated_length": 1118.75, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.6379275855171034, "frac_reward_zero_std": 0.0, "grad_norm": 3.3925061373139878, "kl": 0.02020263671875, "learning_rate": 4.1435360825797417e-07, "loss": -0.0206, "num_tokens": 139512595.0, "reward": -7.450580596923828e-09, "reward_std": 1.0526578426361084, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05615197481278546, "rewards/wordcountpos_reward/raw_geo/std": 0.15483486400340943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1016.4375, "completions/mean_terminated_length": 984.2000732421875, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.638127625525105, "frac_reward_zero_std": 0.0, "grad_norm": 3.283593165918999, "kl": 0.0162811279296875, "learning_rate": 4.1405402822411526e-07, "loss": -0.0466, "num_tokens": 139551706.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5867779850959778, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1085011422198069, "rewards/wordcountpos_reward/raw_geo/std": 0.21715580872033394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1114.375, "completions/mean_terminated_length": 1088.666748046875, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.6383276655331066, "frac_reward_zero_std": 0.0, "grad_norm": 2.849278695022208, "kl": 0.01214599609375, "learning_rate": 4.137545144781297e-07, "loss": 0.0013, "num_tokens": 139602608.0, "reward": 0.0, "reward_std": 0.998940110206604, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12864859104667103, "rewards/wordcountpos_reward/raw_geo/std": 0.05680702731847926, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1267.0625, "completions/mean_terminated_length": 1267.0625, "completions/min_length": 1085.0, "completions/min_terminated_length": 1085.0, "epoch": 0.6385277055411083, "frac_reward_zero_std": 0.0, "grad_norm": 2.530518771721471, "kl": 0.0113372802734375, "learning_rate": 4.134550671660615e-07, "loss": -0.0306, "num_tokens": 139652025.0, "reward": 0.0, "reward_std": 0.8752542734146118, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.036969283538859324, "rewards/wordcountpos_reward/raw_geo/std": 0.14478784575908646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 1069.625, "completions/mean_terminated_length": 1069.625, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.6387277455491098, "frac_reward_zero_std": 0.0, "grad_norm": 2.741902809292069, "kl": 0.0144195556640625, "learning_rate": 4.1315568643392264e-07, "loss": 0.0005, "num_tokens": 139686283.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9043872356414795, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006945997286661827, "rewards/wordcountpos_reward/raw_geo/std": 0.02422212250307252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1171.6875, "completions/mean_terminated_length": 1171.6875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.6389277855571114, "frac_reward_zero_std": 0.0, "grad_norm": 3.160894494138013, "kl": 0.014984130859375, "learning_rate": 4.128563724276923e-07, "loss": -0.0074, "num_tokens": 139725822.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9513463973999023, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1196518492142181, "rewards/wordcountpos_reward/raw_geo/std": 0.11157121081455079, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.639127825565113, "frac_reward_zero_std": 0.0, "grad_norm": 3.7358042748919034, "kl": 0.021942138671875, "learning_rate": 4.1255712529331775e-07, "loss": -0.0438, "num_tokens": 139776966.0, "reward": -2.9802322387695312e-08, "reward_std": 0.657949686050415, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03572701480678395, "rewards/wordcountpos_reward/raw_geo/std": 0.18726954392401096, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 1135.0625, "completions/mean_terminated_length": 1135.0625, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.6393278655731146, "frac_reward_zero_std": 0.0, "grad_norm": 3.263398625082951, "kl": 0.019500732421875, "learning_rate": 4.1225794517671285e-07, "loss": 0.0127, "num_tokens": 139812951.0, "reward": 0.0, "reward_std": 1.0295389890670776, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04856290146683472, "rewards/wordcountpos_reward/raw_geo/std": 0.07026541784881, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1134.9375, "completions/mean_terminated_length": 1134.9375, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.6395279055811163, "frac_reward_zero_std": 0.0, "grad_norm": 3.475602709186293, "kl": 0.01788330078125, "learning_rate": 4.1195883222375947e-07, "loss": -0.0135, "num_tokens": 139855270.0, "reward": 0.0, "reward_std": 0.4045112431049347, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0015846449160216956, "rewards/wordcountpos_reward/raw_geo/std": 0.12092821133282317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18678567634829202, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1200.625, "completions/mean_terminated_length": 1157.857177734375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.6397279455891178, "frac_reward_zero_std": 0.0, "grad_norm": 2.6619543921754105, "kl": 0.013580322265625, "learning_rate": 4.1165978658030633e-07, "loss": -0.0072, "num_tokens": 139906680.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9377762079238892, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010723961559330962, "rewards/wordcountpos_reward/raw_geo/std": 0.07956795800580105, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 886.0625, "completions/mean_terminated_length": 886.0625, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.6399279855971194, "frac_reward_zero_std": 0.0, "grad_norm": 3.1760466239190377, "kl": 0.0136871337890625, "learning_rate": 4.1136080839216934e-07, "loss": 0.0155, "num_tokens": 139941601.0, "reward": 0.0, "reward_std": 0.630158543586731, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02600321765250397, "rewards/wordcountpos_reward/raw_geo/std": 0.08412538195031304, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1145.0625, "completions/mean_terminated_length": 1145.0625, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.640128025605121, "frac_reward_zero_std": 0.0, "grad_norm": 3.2873400754187325, "kl": 0.01824951171875, "learning_rate": 4.110618978051317e-07, "loss": -0.008, "num_tokens": 139984098.0, "reward": 3.3527612686157227e-08, "reward_std": 1.0439988374710083, "rewards/wordcountpos_reward/mean": 3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060434790907022876, "rewards/wordcountpos_reward/raw_geo/std": 0.0800297286678495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 1155.75, "completions/mean_terminated_length": 949.2000122070312, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.6403280656131226, "frac_reward_zero_std": 0.0, "grad_norm": 2.488730931377832, "kl": 0.01214599609375, "learning_rate": 4.1076305496494377e-07, "loss": -0.062, "num_tokens": 140025022.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0434858798980713, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014555227006298865, "rewards/wordcountpos_reward/raw_geo/std": 0.022970601544675673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1019.25, "completions/mean_terminated_length": 987.2000732421875, "completions/min_length": 730.0, "completions/min_terminated_length": 730.0, "epoch": 0.6405281056211243, "frac_reward_zero_std": 0.0, "grad_norm": 3.5616897999683066, "kl": 0.016082763671875, "learning_rate": 4.1046428001732225e-07, "loss": -0.01, "num_tokens": 140066562.0, "reward": 0.0, "reward_std": 0.9755232334136963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0066597026003063515, "rewards/wordcountpos_reward/raw_geo/std": 0.06883212139977411, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1033.0, "completions/mean_terminated_length": 1033.0, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.6407281456291258, "frac_reward_zero_std": 0.0, "grad_norm": 2.9541965228709754, "kl": 0.0166168212890625, "learning_rate": 4.1016557310795166e-07, "loss": 0.0104, "num_tokens": 140105442.0, "reward": 1.862645149230957e-08, "reward_std": 0.9060892462730408, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011666125560527622, "rewards/wordcountpos_reward/raw_geo/std": 0.0655302089366038, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.6409281856371274, "frac_reward_zero_std": 0.0, "grad_norm": 3.17476274407832, "kl": 0.0149383544921875, "learning_rate": 4.0986693438248244e-07, "loss": -0.0261, "num_tokens": 140154663.0, "reward": -1.4901161193847656e-08, "reward_std": 0.970923125743866, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1318848100491916, "rewards/wordcountpos_reward/raw_geo/std": 0.15583187342587856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1230.625, "completions/mean_terminated_length": 1230.625, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.6411282256451291, "frac_reward_zero_std": 0.0, "grad_norm": 3.297889732940511, "kl": 0.018524169921875, "learning_rate": 4.0956836398653247e-07, "loss": -0.0459, "num_tokens": 140202777.0, "reward": 0.0, "reward_std": 0.6516681909561157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.049424632249037065, "rewards/wordcountpos_reward/raw_geo/std": 0.10335408590417337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1227.125, "completions/mean_terminated_length": 1164.1539306640625, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.6413282656531306, "frac_reward_zero_std": 0.0, "grad_norm": 3.125231231902985, "kl": 0.01739501953125, "learning_rate": 4.092698620656857e-07, "loss": -0.0097, "num_tokens": 140254379.0, "reward": 0.0, "reward_std": 0.8927292823791504, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09105567917888459, "rewards/wordcountpos_reward/raw_geo/std": 0.10615789013367138, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1165.4375, "completions/mean_terminated_length": 1165.4375, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.6415283056611323, "frac_reward_zero_std": 0.0, "grad_norm": 2.5988743981866245, "kl": 0.012542724609375, "learning_rate": 4.089714287654935e-07, "loss": -0.0191, "num_tokens": 140300946.0, "reward": 0.0, "reward_std": 0.8564648628234863, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03870981260857872, "rewards/wordcountpos_reward/raw_geo/std": 0.09677090531938162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1276.125, "completions/mean_terminated_length": 1174.3636474609375, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.6417283456691338, "frac_reward_zero_std": 0.0, "grad_norm": 2.6329267413438115, "kl": 0.011627197265625, "learning_rate": 4.0867306423147273e-07, "loss": 0.0146, "num_tokens": 140350660.0, "reward": 0.0, "reward_std": 1.0143091678619385, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06744301861286513, "rewards/wordcountpos_reward/raw_geo/std": 0.08787151745406302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 774.0, "completions/max_terminated_length": 774.0, "completions/mean_length": 543.5, "completions/mean_terminated_length": 543.5, "completions/min_length": 385.0, "completions/min_terminated_length": 385.0, "epoch": 0.6419283856771354, "frac_reward_zero_std": 0.0, "grad_norm": 4.306762906775191, "kl": 0.013824462890625, "learning_rate": 4.083747686091079e-07, "loss": -0.0455, "num_tokens": 140372916.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0385055541992188, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04196117612783351, "rewards/wordcountpos_reward/raw_geo/std": 0.11278372856855644, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17121569675358278, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1083.625, "completions/mean_terminated_length": 1083.625, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.6421284256851371, "frac_reward_zero_std": 0.0, "grad_norm": 2.7438271472049722, "kl": 0.01202392578125, "learning_rate": 4.0807654204384877e-07, "loss": -0.0387, "num_tokens": 140424302.0, "reward": 0.0, "reward_std": 0.9040760397911072, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.110967367666045, "rewards/wordcountpos_reward/raw_geo/std": 0.08236337702681112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1318.5, "completions/mean_terminated_length": 1209.5999755859375, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.6423284656931386, "frac_reward_zero_std": 0.0, "grad_norm": 3.0946431420915523, "kl": 0.0165557861328125, "learning_rate": 4.0777838468111235e-07, "loss": 0.0216, "num_tokens": 140472638.0, "reward": 2.2351741790771484e-08, "reward_std": 1.053022027015686, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037906033039072855, "rewards/wordcountpos_reward/raw_geo/std": 0.046181430651077826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1148.5625, "completions/mean_terminated_length": 1148.5625, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.6425285057011402, "frac_reward_zero_std": 0.0, "grad_norm": 3.520323676582336, "kl": 0.018798828125, "learning_rate": 4.0748029666628147e-07, "loss": -0.0054, "num_tokens": 140511655.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9359853267669678, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12694010412125636, "rewards/wordcountpos_reward/raw_geo/std": 0.18411333391947635, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1074.6875, "completions/mean_terminated_length": 1046.3333740234375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.6427285457091418, "frac_reward_zero_std": 0.0, "grad_norm": 3.356981365978572, "kl": 0.017822265625, "learning_rate": 4.0718227814470475e-07, "loss": 0.0217, "num_tokens": 140555250.0, "reward": 2.9802322387695312e-08, "reward_std": 1.012027621269226, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018701340122197287, "rewards/wordcountpos_reward/raw_geo/std": 0.050249376509870054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1096.25, "completions/mean_terminated_length": 1003.0769653320312, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.6429285857171434, "frac_reward_zero_std": 0.0, "grad_norm": 2.8495165494085666, "kl": 0.0148468017578125, "learning_rate": 4.068843292616981e-07, "loss": 0.082, "num_tokens": 140602646.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5461558699607849, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04602419934475629, "rewards/wordcountpos_reward/raw_geo/std": 0.22077569224876917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.15817243286527058, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1063.4375, "completions/mean_terminated_length": 1034.3333740234375, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.6431286257251451, "frac_reward_zero_std": 0.0, "grad_norm": 3.316977865141545, "kl": 0.0157012939453125, "learning_rate": 4.065864501625421e-07, "loss": -0.0163, "num_tokens": 140637757.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9747430086135864, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0265140424019873, "rewards/wordcountpos_reward/raw_geo/std": 0.09973658257150461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1270024788326182, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1209.9375, "completions/mean_terminated_length": 1143.0, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.6433286657331466, "frac_reward_zero_std": 0.0, "grad_norm": 3.07265421365454, "kl": 0.017303466796875, "learning_rate": 4.062886409924844e-07, "loss": -0.0377, "num_tokens": 140688332.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6962119340896606, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09460237955577246, "rewards/wordcountpos_reward/raw_geo/std": 0.10094012224060539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1045.1875, "completions/mean_terminated_length": 1045.1875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.6435287057411482, "frac_reward_zero_std": 0.0, "grad_norm": 3.282551215739982, "kl": 0.0157012939453125, "learning_rate": 4.059909018967378e-07, "loss": -0.0158, "num_tokens": 140716855.0, "reward": 4.842877388000488e-08, "reward_std": 1.0102922916412354, "rewards/wordcountpos_reward/mean": 4.842877388000488e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10785507960683767, "rewards/wordcountpos_reward/raw_geo/std": 0.05746357932874369, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1033.5625, "completions/mean_terminated_length": 1033.5625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.6437287457491498, "frac_reward_zero_std": 0.0, "grad_norm": 2.8493090096425644, "kl": 0.01396942138671875, "learning_rate": 4.056932330204814e-07, "loss": -0.0053, "num_tokens": 140751008.0, "reward": 0.0, "reward_std": 0.8604227304458618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2205546462288369, "rewards/wordcountpos_reward/raw_geo/std": 0.3131256423462782, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.024343224778007377, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1112.375, "completions/mean_terminated_length": 1112.375, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.6439287857571514, "frac_reward_zero_std": 0.0, "grad_norm": 2.772887835531527, "kl": 0.01074981689453125, "learning_rate": 4.053956345088599e-07, "loss": -0.0152, "num_tokens": 140793414.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0382843017578125, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04925560953654923, "rewards/wordcountpos_reward/raw_geo/std": 0.08234598126956347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1218.9375, "completions/mean_terminated_length": 1200.2000732421875, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.6441288257651531, "frac_reward_zero_std": 0.0, "grad_norm": 3.1948786683023083, "kl": 0.01837158203125, "learning_rate": 4.0509810650698374e-07, "loss": 0.0071, "num_tokens": 140836053.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8970828056335449, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015665051699761144, "rewards/wordcountpos_reward/raw_geo/std": 0.06175552255097221, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1158.1429443359375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.6443288657731546, "frac_reward_zero_std": 0.0, "grad_norm": 3.09587819528902, "kl": 0.0185546875, "learning_rate": 4.048006491599287e-07, "loss": 0.0198, "num_tokens": 140882851.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0634472370147705, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08710142287693046, "rewards/wordcountpos_reward/raw_geo/std": 0.13429546801338416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1041.4375, "completions/mean_terminated_length": 1041.4375, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.6445289057811562, "frac_reward_zero_std": 0.0, "grad_norm": 3.3402869745519093, "kl": 0.0177154541015625, "learning_rate": 4.0450326261273657e-07, "loss": 0.0174, "num_tokens": 140933050.0, "reward": 0.0, "reward_std": 0.476829469203949, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1692090928567165, "rewards/wordcountpos_reward/raw_geo/std": 0.1399750595527291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1046.75, "completions/mean_terminated_length": 1046.75, "completions/min_length": 803.0, "completions/min_terminated_length": 803.0, "epoch": 0.6447289457891578, "frac_reward_zero_std": 0.0, "grad_norm": 3.0006020092046737, "kl": 0.0124359130859375, "learning_rate": 4.042059470104141e-07, "loss": -0.022, "num_tokens": 140966094.0, "reward": 1.4901161193847656e-08, "reward_std": 1.009346604347229, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060746706052316156, "rewards/wordcountpos_reward/raw_geo/std": 0.07519692385875787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1265.375, "completions/mean_terminated_length": 1231.857177734375, "completions/min_length": 1107.0, "completions/min_terminated_length": 1107.0, "epoch": 0.6449289857971594, "frac_reward_zero_std": 0.0, "grad_norm": 3.2312320959918366, "kl": 0.0171966552734375, "learning_rate": 4.0390870249793406e-07, "loss": -0.0343, "num_tokens": 141012164.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8127026557922363, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08576008639699934, "rewards/wordcountpos_reward/raw_geo/std": 0.1562245280254387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1236.0, "completions/mean_terminated_length": 1236.0, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.6451290258051611, "frac_reward_zero_std": 0.0, "grad_norm": 3.356492953746788, "kl": 0.0179443359375, "learning_rate": 4.03611529220234e-07, "loss": 0.0253, "num_tokens": 141052596.0, "reward": 0.0, "reward_std": 0.4755361080169678, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15969102807536234, "rewards/wordcountpos_reward/raw_geo/std": 0.15766948623750338, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1161.6875, "completions/mean_terminated_length": 1113.357177734375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.6453290658131626, "frac_reward_zero_std": 0.0, "grad_norm": 3.047306036638341, "kl": 0.01300048828125, "learning_rate": 4.033144273222171e-07, "loss": -0.0502, "num_tokens": 141102551.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8055408000946045, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07965350989831121, "rewards/wordcountpos_reward/raw_geo/std": 0.11586940264012136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7375, "rewards/wordcountpos_reward/raw_rule/std": 0.17633091785785213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1260.1875, "completions/mean_terminated_length": 1204.84619140625, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.6455291058211642, "frac_reward_zero_std": 0.0, "grad_norm": 3.2005297211829133, "kl": 0.010528564453125, "learning_rate": 4.030173969487515e-07, "loss": 0.0182, "num_tokens": 141142586.0, "reward": 0.0, "reward_std": 0.6736308336257935, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07396747881102946, "rewards/wordcountpos_reward/raw_geo/std": 0.12892742142454877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1213.5625, "completions/mean_terminated_length": 1041.7000732421875, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.6457291458291659, "frac_reward_zero_std": 0.0, "grad_norm": 2.7091116087993883, "kl": 0.01361083984375, "learning_rate": 4.027204382446706e-07, "loss": -0.0787, "num_tokens": 141195747.0, "reward": 0.0, "reward_std": 0.9003403186798096, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13387362038018813, "rewards/wordcountpos_reward/raw_geo/std": 0.14355871999334024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125757, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1359.9375, "completions/mean_terminated_length": 1275.9000244140625, "completions/min_length": 1186.0, "completions/min_terminated_length": 1186.0, "epoch": 0.6459291858371674, "frac_reward_zero_std": 0.0, "grad_norm": 2.4064175575813884, "kl": 0.01202392578125, "learning_rate": 4.024235513547727e-07, "loss": -0.0034, "num_tokens": 141246474.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6832606792449951, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03865709805494208, "rewards/wordcountpos_reward/raw_geo/std": 0.038654004320163166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1169.9375, "completions/mean_terminated_length": 1122.7857666015625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.6461292258451691, "frac_reward_zero_std": 0.0, "grad_norm": 2.8801570717734415, "kl": 0.0146026611328125, "learning_rate": 4.0212673642382144e-07, "loss": 0.0088, "num_tokens": 141291889.0, "reward": -2.60770320892334e-08, "reward_std": 1.0584347248077393, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08823392835151815, "rewards/wordcountpos_reward/raw_geo/std": 0.05840454963257851, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1316.875, "completions/mean_terminated_length": 1174.4444580078125, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.6463292658531706, "frac_reward_zero_std": 0.0, "grad_norm": 3.3073709913946976, "kl": 0.020050048828125, "learning_rate": 4.018299935965447e-07, "loss": -0.014, "num_tokens": 141341151.0, "reward": 0.0, "reward_std": 0.9237385392189026, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.172727200347695, "rewards/wordcountpos_reward/raw_geo/std": 0.1296538204701773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1243.0, "completions/max_terminated_length": 1243.0, "completions/mean_length": 1066.4375, "completions/mean_terminated_length": 1066.4375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6465293058611722, "frac_reward_zero_std": 0.0, "grad_norm": 2.278518762165077, "kl": 0.0159149169921875, "learning_rate": 4.015333230176362e-07, "loss": 0.0092, "num_tokens": 141385382.0, "reward": -1.4901161193847656e-08, "reward_std": 1.012843370437622, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12375284575175853, "rewards/wordcountpos_reward/raw_geo/std": 0.12807281580235863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1021.625, "completions/mean_terminated_length": 1021.625, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.6467293458691739, "frac_reward_zero_std": 0.0, "grad_norm": 3.052515017432756, "kl": 0.0208740234375, "learning_rate": 4.012367248317533e-07, "loss": -0.0152, "num_tokens": 141426368.0, "reward": 0.0, "reward_std": 0.9525411128997803, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.010562628134922427, "rewards/wordcountpos_reward/raw_geo/std": 0.04503647618066554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1210.1875, "completions/mean_terminated_length": 1190.86669921875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.6469293858771754, "frac_reward_zero_std": 0.0, "grad_norm": 3.292883915445337, "kl": 0.020782470703125, "learning_rate": 4.009401991835192e-07, "loss": 0.0214, "num_tokens": 141469651.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0590753555297852, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08809454892059318, "rewards/wordcountpos_reward/raw_geo/std": 0.12068854929961129, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1249.5625, "completions/mean_terminated_length": 1099.300048828125, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.6471294258851771, "frac_reward_zero_std": 0.0, "grad_norm": 3.0301146888888724, "kl": 0.0146331787109375, "learning_rate": 4.006437462175205e-07, "loss": -0.075, "num_tokens": 141521396.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0493439435958862, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08438616298527925, "rewards/wordcountpos_reward/raw_geo/std": 0.03991182603717889, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1220.3125, "completions/mean_terminated_length": 1180.357177734375, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.6473294658931786, "frac_reward_zero_std": 0.0, "grad_norm": 2.484972372495161, "kl": 0.0121612548828125, "learning_rate": 4.0034736607830966e-07, "loss": -0.0059, "num_tokens": 141566601.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9469816088676453, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018117590816089643, "rewards/wordcountpos_reward/raw_geo/std": 0.09295575897019391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1156.5, "completions/mean_terminated_length": 1156.5, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.6475295059011802, "frac_reward_zero_std": 0.0, "grad_norm": 3.384028470505073, "kl": 0.016082763671875, "learning_rate": 4.0005105891040246e-07, "loss": -0.0131, "num_tokens": 141611881.0, "reward": 0.0, "reward_std": 0.9560538530349731, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08386333265625964, "rewards/wordcountpos_reward/raw_geo/std": 0.0532749124816171, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1385.4375, "completions/mean_terminated_length": 1296.3333740234375, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.6477295459091819, "frac_reward_zero_std": 0.0, "grad_norm": 3.128681481872886, "kl": 0.0170135498046875, "learning_rate": 3.9975482485828005e-07, "loss": -0.0029, "num_tokens": 141654504.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0082446336746216, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.003691027131630494, "rewards/wordcountpos_reward/raw_geo/std": 0.2675103471935962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1005.875, "completions/mean_terminated_length": 1005.875, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.6479295859171834, "frac_reward_zero_std": 0.0, "grad_norm": 3.2762631511249385, "kl": 0.0162811279296875, "learning_rate": 3.994586640663872e-07, "loss": -0.0494, "num_tokens": 141688398.0, "reward": 2.9802322387695312e-08, "reward_std": 0.907970666885376, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1396990892744243, "rewards/wordcountpos_reward/raw_geo/std": 0.04092977578869187, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1210.25, "completions/mean_terminated_length": 1113.666748046875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.6481296259251851, "frac_reward_zero_std": 0.0, "grad_norm": 2.748768070916777, "kl": 0.0156097412109375, "learning_rate": 3.9916257667913334e-07, "loss": 0.0049, "num_tokens": 141731762.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8600629568099976, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17630215697440602, "rewards/wordcountpos_reward/raw_geo/std": 0.1003321119402612, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1085254706406647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1249.0, "completions/mean_terminated_length": 1191.0770263671875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.6483296659331866, "frac_reward_zero_std": 0.0, "grad_norm": 2.805472525515038, "kl": 0.0152587890625, "learning_rate": 3.9886656284089206e-07, "loss": -0.0121, "num_tokens": 141777002.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0138096809387207, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01614687191175696, "rewards/wordcountpos_reward/raw_geo/std": 0.07968623405038047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1270.875, "completions/mean_terminated_length": 1238.1429443359375, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.6485297059411882, "frac_reward_zero_std": 0.0, "grad_norm": 2.4969144783423585, "kl": 0.015380859375, "learning_rate": 3.9857062269600097e-07, "loss": 0.0183, "num_tokens": 141815928.0, "reward": 0.0, "reward_std": 0.6011440753936768, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006607503668864804, "rewards/wordcountpos_reward/raw_geo/std": 0.050130520376776795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1132.5625, "completions/mean_terminated_length": 1132.5625, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.6487297459491899, "frac_reward_zero_std": 0.0, "grad_norm": 3.4762006838216806, "kl": 0.01922607421875, "learning_rate": 3.9827475638876184e-07, "loss": -0.0308, "num_tokens": 141860969.0, "reward": 0.0, "reward_std": 0.820988655090332, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22965117751749542, "rewards/wordcountpos_reward/raw_geo/std": 0.17763270220338492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1347.5625, "completions/mean_terminated_length": 1278.272705078125, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.6489297859571914, "frac_reward_zero_std": 0.0, "grad_norm": 2.8330270577084766, "kl": 0.017669677734375, "learning_rate": 3.9797896406344057e-07, "loss": 0.0105, "num_tokens": 141909122.0, "reward": 0.0, "reward_std": 0.858787477016449, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20651770891654694, "rewards/wordcountpos_reward/raw_geo/std": 0.2721795985306407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1215.625, "completions/mean_terminated_length": 1150.0, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.6491298259651931, "frac_reward_zero_std": 0.0, "grad_norm": 3.4113201877418424, "kl": 0.021209716796875, "learning_rate": 3.976832458642666e-07, "loss": -0.0382, "num_tokens": 141957892.0, "reward": -1.4901161193847656e-08, "reward_std": 1.007545828819275, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17998331589959646, "rewards/wordcountpos_reward/raw_geo/std": 0.16040599462781355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1382.375, "completions/mean_terminated_length": 1343.166748046875, "completions/min_length": 1154.0, "completions/min_terminated_length": 1154.0, "epoch": 0.6493298659731946, "frac_reward_zero_std": 0.0, "grad_norm": 2.5062640434798746, "kl": 0.013702392578125, "learning_rate": 3.9738760193543377e-07, "loss": 0.0085, "num_tokens": 142009322.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9738023281097412, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0638793983760139, "rewards/wordcountpos_reward/raw_geo/std": 0.0848261930094679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1156.0625, "completions/mean_terminated_length": 1133.1334228515625, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.6495299059811962, "frac_reward_zero_std": 0.0, "grad_norm": 2.488238615047828, "kl": 0.011932373046875, "learning_rate": 3.970920324210991e-07, "loss": -0.0444, "num_tokens": 142052787.0, "reward": 0.0, "reward_std": 0.844889760017395, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12612374272280014, "rewards/wordcountpos_reward/raw_geo/std": 0.3017120592199765, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 1305.1875, "completions/mean_terminated_length": 1188.300048828125, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.6497299459891979, "frac_reward_zero_std": 0.0, "grad_norm": 2.5269893144282993, "kl": 0.0106048583984375, "learning_rate": 3.9679653746538377e-07, "loss": -0.0039, "num_tokens": 142095286.0, "reward": 0.0, "reward_std": 0.6478649377822876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04623941762762561, "rewards/wordcountpos_reward/raw_geo/std": 0.1289854355453977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1320.9375, "completions/mean_terminated_length": 1295.357177734375, "completions/min_length": 1154.0, "completions/min_terminated_length": 1154.0, "epoch": 0.6499299859971994, "frac_reward_zero_std": 0.0, "grad_norm": 2.9953020337723637, "kl": 0.0167694091796875, "learning_rate": 3.9650111721237245e-07, "loss": -0.0165, "num_tokens": 142131293.0, "reward": -5.960464477539063e-08, "reward_std": 0.42456620931625366, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005151944369332756, "rewards/wordcountpos_reward/raw_geo/std": 0.014078680744382493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 1011.5625, "completions/mean_terminated_length": 1011.5625, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.6501300260052011, "frac_reward_zero_std": 0.0, "grad_norm": 3.9417678261177596, "kl": 0.01995849609375, "learning_rate": 3.9620577180611356e-07, "loss": 0.0202, "num_tokens": 142171238.0, "reward": 2.2351741790771484e-08, "reward_std": 0.8626536130905151, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006341361882357253, "rewards/wordcountpos_reward/raw_geo/std": 0.014197212929763128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1207.3125, "completions/mean_terminated_length": 1139.769287109375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.6503300660132026, "frac_reward_zero_std": 0.0, "grad_norm": 3.2284065061670106, "kl": 0.0157623291015625, "learning_rate": 3.959105013906184e-07, "loss": -0.0116, "num_tokens": 142223667.0, "reward": 0.0, "reward_std": 0.7118128538131714, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.5083298901327855, "rewards/wordcountpos_reward/raw_geo/std": 0.2900950109844135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.6505301060212042, "frac_reward_zero_std": 0.0, "grad_norm": 3.2912285753918646, "kl": 0.01788330078125, "learning_rate": 3.9561530610986283e-07, "loss": -0.027, "num_tokens": 142266935.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7163352966308594, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04221263463336578, "rewards/wordcountpos_reward/raw_geo/std": 0.1719220178166546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1133.1875, "completions/mean_terminated_length": 1133.1875, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.6507301460292059, "frac_reward_zero_std": 0.0, "grad_norm": 3.0540964815168214, "kl": 0.017364501953125, "learning_rate": 3.9532018610778474e-07, "loss": -0.0182, "num_tokens": 142315634.0, "reward": -5.960464477539063e-08, "reward_std": 0.7266839742660522, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.045030274478619514, "rewards/wordcountpos_reward/raw_geo/std": 0.39073058146911865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 1119.5625, "completions/mean_terminated_length": 1119.5625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.6509301860372074, "frac_reward_zero_std": 0.0, "grad_norm": 2.5693906306842593, "kl": 0.0117340087890625, "learning_rate": 3.950251415282866e-07, "loss": -0.0057, "num_tokens": 142353571.0, "reward": 0.0, "reward_std": 0.8732448816299438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11702331524933088, "rewards/wordcountpos_reward/raw_geo/std": 0.14609301526608398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1165.0, "completions/mean_length": 917.4375, "completions/mean_terminated_length": 878.6000366210938, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.6511302260452091, "frac_reward_zero_std": 0.0, "grad_norm": 2.987916632463737, "kl": 0.00991058349609375, "learning_rate": 3.9473017251523275e-07, "loss": 0.081, "num_tokens": 142384666.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8820925951004028, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1782188312772244, "rewards/wordcountpos_reward/raw_geo/std": 0.1154411580979546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1136.3125, "completions/mean_terminated_length": 1136.3125, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.6513302660532106, "frac_reward_zero_std": 0.0, "grad_norm": 3.5884265700007503, "kl": 0.019378662109375, "learning_rate": 3.9443527921245233e-07, "loss": -0.0084, "num_tokens": 142433639.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8390098810195923, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0024459964955055556, "rewards/wordcountpos_reward/raw_geo/std": 0.08991711936586422, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1136.625, "completions/mean_terminated_length": 1136.625, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.6515303060612122, "frac_reward_zero_std": 0.0, "grad_norm": 3.102184074237592, "kl": 0.018707275390625, "learning_rate": 3.941404617637357e-07, "loss": 0.0113, "num_tokens": 142483289.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9151872396469116, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008124780363407334, "rewards/wordcountpos_reward/raw_geo/std": 0.12226381802403956, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1071.1875, "completions/mean_terminated_length": 1071.1875, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.6517303460692139, "frac_reward_zero_std": 0.0, "grad_norm": 3.46119165069364, "kl": 0.0195159912109375, "learning_rate": 3.9384572031283805e-07, "loss": -0.0038, "num_tokens": 142535116.0, "reward": 0.0, "reward_std": 0.4080125391483307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0374818447814694, "rewards/wordcountpos_reward/raw_geo/std": 0.06321897164595457, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1156.0625, "completions/mean_terminated_length": 1156.0625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.6519303860772154, "frac_reward_zero_std": 0.0, "grad_norm": 2.5337796516350335, "kl": 0.0117340087890625, "learning_rate": 3.9355105500347605e-07, "loss": 0.0033, "num_tokens": 142580253.0, "reward": 0.0, "reward_std": 0.9640440940856934, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02856508109184978, "rewards/wordcountpos_reward/raw_geo/std": 0.06984935714636728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1216.625, "completions/mean_terminated_length": 1216.625, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.6521304260852171, "frac_reward_zero_std": 0.0, "grad_norm": 3.1558157558313593, "kl": 0.01605224609375, "learning_rate": 3.932564659793302e-07, "loss": 0.0127, "num_tokens": 142622071.0, "reward": 0.0, "reward_std": 0.8861334323883057, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.044979374427442355, "rewards/wordcountpos_reward/raw_geo/std": 0.07772707546557367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1290.875, "completions/mean_terminated_length": 1242.615478515625, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.6523304660932187, "frac_reward_zero_std": 0.0, "grad_norm": 2.3669456207322965, "kl": 0.0101470947265625, "learning_rate": 3.9296195338404313e-07, "loss": 0.0152, "num_tokens": 142676333.0, "reward": 0.0, "reward_std": 0.9837334752082825, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025866522533862825, "rewards/wordcountpos_reward/raw_geo/std": 0.06673323832473795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1210.5625, "completions/mean_terminated_length": 1191.2667236328125, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.6525305061012202, "frac_reward_zero_std": 0.0, "grad_norm": 3.2162251402954616, "kl": 0.01654052734375, "learning_rate": 3.926675173612207e-07, "loss": -0.0109, "num_tokens": 142727398.0, "reward": 0.0, "reward_std": 0.5420777797698975, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01831778799280597, "rewards/wordcountpos_reward/raw_geo/std": 0.056619286970503314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1156.8125, "completions/mean_terminated_length": 1133.933349609375, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.6527305461092219, "frac_reward_zero_std": 0.0, "grad_norm": 2.9708598027804975, "kl": 0.0148468017578125, "learning_rate": 3.9237315805443127e-07, "loss": -0.0249, "num_tokens": 142775507.0, "reward": -2.9802322387695312e-08, "reward_std": 0.45866233110427856, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17808346673332565, "rewards/wordcountpos_reward/raw_geo/std": 0.2337929502727528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1198.6875, "completions/mean_terminated_length": 1198.6875, "completions/min_length": 1015.0, "completions/min_terminated_length": 1015.0, "epoch": 0.6529305861172234, "frac_reward_zero_std": 0.0, "grad_norm": 2.865354039407948, "kl": 0.015380859375, "learning_rate": 3.920788756072059e-07, "loss": 0.0087, "num_tokens": 142808294.0, "reward": 0.0, "reward_std": 0.6295730471611023, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0038384008316326833, "rewards/wordcountpos_reward/raw_geo/std": 0.3150166234565218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1178.75, "completions/mean_terminated_length": 1157.3333740234375, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.6531306261252251, "frac_reward_zero_std": 0.0, "grad_norm": 3.1803655186036375, "kl": 0.017333984375, "learning_rate": 3.917846701630376e-07, "loss": 0.018, "num_tokens": 142857218.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7726126909255981, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013802156559014225, "rewards/wordcountpos_reward/raw_geo/std": 0.18577057469267577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1314.4375, "completions/mean_terminated_length": 1203.0999755859375, "completions/min_length": 1036.0, "completions/min_terminated_length": 1036.0, "epoch": 0.6533306661332267, "frac_reward_zero_std": 0.0, "grad_norm": 2.750164080150984, "kl": 0.0121917724609375, "learning_rate": 3.914905418653828e-07, "loss": -0.0286, "num_tokens": 142900361.0, "reward": 2.9802322387695312e-08, "reward_std": 0.920957624912262, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.27200120205994743, "rewards/wordcountpos_reward/raw_geo/std": 0.15828478284191863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1259.5625, "completions/mean_terminated_length": 1243.533447265625, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.6535307061412282, "frac_reward_zero_std": 0.0, "grad_norm": 2.2859919717091746, "kl": 0.01104736328125, "learning_rate": 3.911964908576595e-07, "loss": 0.0435, "num_tokens": 142954578.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9928600192070007, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17853888771704654, "rewards/wordcountpos_reward/raw_geo/std": 0.06939201225899982, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 932.125, "completions/mean_terminated_length": 932.125, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.6537307461492299, "frac_reward_zero_std": 0.0, "grad_norm": 2.928741176435277, "kl": 0.018890380859375, "learning_rate": 3.909025172832483e-07, "loss": -0.0078, "num_tokens": 142986156.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9316201210021973, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.046720260806214185, "rewards/wordcountpos_reward/raw_geo/std": 0.040117231040876826, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1276.25, "completions/mean_terminated_length": 1224.615478515625, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.6539307861572314, "frac_reward_zero_std": 0.0, "grad_norm": 2.9323566594751007, "kl": 0.0139007568359375, "learning_rate": 3.906086212854922e-07, "loss": -0.0351, "num_tokens": 143038896.0, "reward": 0.0, "reward_std": 0.9783488512039185, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.21816126542620276, "rewards/wordcountpos_reward/raw_geo/std": 0.07515163513249978, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1205.6875, "completions/mean_terminated_length": 1205.6875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.654130826165233, "frac_reward_zero_std": 0.0, "grad_norm": 2.7005736944463528, "kl": 0.01495361328125, "learning_rate": 3.9031480300769615e-07, "loss": -0.0174, "num_tokens": 143083835.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7614495754241943, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011069893220615693, "rewards/wordcountpos_reward/raw_geo/std": 0.06264767755086405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1165.5, "completions/mean_terminated_length": 1165.5, "completions/min_length": 1044.0, "completions/min_terminated_length": 1044.0, "epoch": 0.6543308661732347, "frac_reward_zero_std": 0.0, "grad_norm": 2.4429013597552145, "kl": 0.00982666015625, "learning_rate": 3.900210625931273e-07, "loss": -0.0208, "num_tokens": 143130755.0, "reward": 0.0, "reward_std": 1.0161787271499634, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17847588326005978, "rewards/wordcountpos_reward/raw_geo/std": 0.06444240316419299, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1295.25, "completions/mean_terminated_length": 1266.0, "completions/min_length": 1124.0, "completions/min_terminated_length": 1124.0, "epoch": 0.6545309061812362, "frac_reward_zero_std": 0.0, "grad_norm": 2.2931865259748028, "kl": 0.009918212890625, "learning_rate": 3.897274001850146e-07, "loss": 0.0081, "num_tokens": 143170255.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8110247254371643, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08657345632313274, "rewards/wordcountpos_reward/raw_geo/std": 0.1316836689289331, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1092.916748046875, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.6547309461892379, "frac_reward_zero_std": 0.0, "grad_norm": 3.7151228666187683, "kl": 0.018280029296875, "learning_rate": 3.894338159265496e-07, "loss": -0.0083, "num_tokens": 143209250.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8745464086532593, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.038841513531855144, "rewards/wordcountpos_reward/raw_geo/std": 0.0692622824338826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1079.75, "completions/mean_terminated_length": 1079.75, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.6549309861972394, "frac_reward_zero_std": 0.0, "grad_norm": 3.3870979364342753, "kl": 0.0146331787109375, "learning_rate": 3.891403099608848e-07, "loss": 0.0145, "num_tokens": 143249918.0, "reward": 2.9802322387695312e-08, "reward_std": 1.009496808052063, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07472071933772469, "rewards/wordcountpos_reward/raw_geo/std": 0.08921314303903397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1189.875, "completions/mean_terminated_length": 1169.2000732421875, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.655131026205241, "frac_reward_zero_std": 0.0, "grad_norm": 3.4120464990053776, "kl": 0.020782470703125, "learning_rate": 3.888468824311355e-07, "loss": 0.0453, "num_tokens": 143302684.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6535661816596985, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11130344796799026, "rewards/wordcountpos_reward/raw_geo/std": 0.14453191696306877, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06070572613176772, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1341.1875, "completions/mean_terminated_length": 1217.6666259765625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.6553310662132427, "frac_reward_zero_std": 0.0, "grad_norm": 3.2519264408511193, "kl": 0.021270751953125, "learning_rate": 3.88553533480378e-07, "loss": -0.0099, "num_tokens": 143346695.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9480671882629395, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06097575351155474, "rewards/wordcountpos_reward/raw_geo/std": 0.04975724110375371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1183.4375, "completions/mean_terminated_length": 1162.3333740234375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.6555311062212442, "frac_reward_zero_std": 0.0, "grad_norm": 3.082190579726607, "kl": 0.01922607421875, "learning_rate": 3.8826026325165075e-07, "loss": -0.0538, "num_tokens": 143386958.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0012234449386597, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05844881622082171, "rewards/wordcountpos_reward/raw_geo/std": 0.1172068922529898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1268.375, "completions/mean_terminated_length": 1252.933349609375, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.6557311462292459, "frac_reward_zero_std": 0.0, "grad_norm": 3.0756245736132626, "kl": 0.0141143798828125, "learning_rate": 3.8796707188795355e-07, "loss": 0.0419, "num_tokens": 143422140.0, "reward": 0.0, "reward_std": 1.0096518993377686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05199996950463938, "rewards/wordcountpos_reward/raw_geo/std": 0.04612286936987485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1121.6875, "completions/mean_terminated_length": 1121.6875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.6559311862372474, "frac_reward_zero_std": 0.0, "grad_norm": 3.135916460544545, "kl": 0.0171356201171875, "learning_rate": 3.87673959532248e-07, "loss": 0.006, "num_tokens": 143461759.0, "reward": 3.725290298461914e-08, "reward_std": 1.0354605913162231, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04647558593811834, "rewards/wordcountpos_reward/raw_geo/std": 0.10161609941098805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1090.0625, "completions/mean_terminated_length": 1090.0625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.656131226245249, "frac_reward_zero_std": 0.0, "grad_norm": 2.730822616876137, "kl": 0.0174102783203125, "learning_rate": 3.873809263274567e-07, "loss": 0.0076, "num_tokens": 143507576.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0555397272109985, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07932512996037691, "rewards/wordcountpos_reward/raw_geo/std": 0.08880432090175668, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1327.375, "completions/mean_terminated_length": 1193.111083984375, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.6563312662532507, "frac_reward_zero_std": 0.0, "grad_norm": 2.912355478070788, "kl": 0.01983642578125, "learning_rate": 3.870879724164643e-07, "loss": 0.0458, "num_tokens": 143555934.0, "reward": 0.0, "reward_std": 0.31588608026504517, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0013656483110070904, "rewards/wordcountpos_reward/raw_geo/std": 0.25595663999950324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1075.25, "completions/mean_terminated_length": 1046.933349609375, "completions/min_length": 762.0, "completions/min_terminated_length": 762.0, "epoch": 0.6565313062612522, "frac_reward_zero_std": 0.0, "grad_norm": 2.7609992161645973, "kl": 0.01324462890625, "learning_rate": 3.8679509794211605e-07, "loss": -0.0118, "num_tokens": 143595850.0, "reward": 2.9802322387695312e-08, "reward_std": 0.48361945152282715, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.056976590899476945, "rewards/wordcountpos_reward/raw_geo/std": 0.060804800931644955, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1176.125, "completions/mean_terminated_length": 1154.533447265625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.6567313462692539, "frac_reward_zero_std": 0.0, "grad_norm": 3.661427736030543, "kl": 0.021514892578125, "learning_rate": 3.8650230304721956e-07, "loss": 0.0233, "num_tokens": 143637084.0, "reward": 7.450580596923828e-09, "reward_std": 1.0614190101623535, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14155640269279873, "rewards/wordcountpos_reward/raw_geo/std": 0.12653035320427153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1157.8125, "completions/mean_terminated_length": 1108.9285888671875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.6569313862772554, "frac_reward_zero_std": 0.0, "grad_norm": 3.170495459961004, "kl": 0.0161285400390625, "learning_rate": 3.8620958787454214e-07, "loss": -0.0376, "num_tokens": 143689153.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4617731273174286, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1085805714057971, "rewards/wordcountpos_reward/raw_geo/std": 0.20214228497280193, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1239.75, "completions/mean_terminated_length": 1239.75, "completions/min_length": 1143.0, "completions/min_terminated_length": 1143.0, "epoch": 0.657131426285257, "frac_reward_zero_std": 0.0, "grad_norm": 3.1668561863941953, "kl": 0.019317626953125, "learning_rate": 3.8591695256681365e-07, "loss": -0.0088, "num_tokens": 143739173.0, "reward": 0.0, "reward_std": 1.0085866451263428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0890677978606673, "rewards/wordcountpos_reward/raw_geo/std": 0.13396344190811754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1154.0, "completions/max_terminated_length": 1154.0, "completions/mean_length": 1047.5, "completions/mean_terminated_length": 1047.5, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.6573314662932587, "frac_reward_zero_std": 0.0, "grad_norm": 3.131685311794739, "kl": 0.014739990234375, "learning_rate": 3.856243972667241e-07, "loss": 0.0124, "num_tokens": 143775325.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7255717515945435, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01561561567084969, "rewards/wordcountpos_reward/raw_geo/std": 0.09998345179331163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 978.0625, "completions/mean_terminated_length": 978.0625, "completions/min_length": 663.0, "completions/min_terminated_length": 663.0, "epoch": 0.6575315063012602, "frac_reward_zero_std": 0.0, "grad_norm": 3.5433541572968354, "kl": 0.018707275390625, "learning_rate": 3.85331922116925e-07, "loss": 0.0137, "num_tokens": 143816614.0, "reward": 0.0, "reward_std": 0.5306374430656433, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1929783791821918, "rewards/wordcountpos_reward/raw_geo/std": 0.17116894437925545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1180.9375, "completions/mean_terminated_length": 1180.9375, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.6577315463092619, "frac_reward_zero_std": 0.0, "grad_norm": 3.1209400555286404, "kl": 0.018768310546875, "learning_rate": 3.850395272600283e-07, "loss": -0.027, "num_tokens": 143850397.0, "reward": -2.9802322387695312e-08, "reward_std": 1.015316128730774, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020492124573296876, "rewards/wordcountpos_reward/raw_geo/std": 0.05087414953407257, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1157.25, "completions/mean_terminated_length": 1157.25, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.6579315863172635, "frac_reward_zero_std": 0.0, "grad_norm": 2.9354275754860533, "kl": 0.01617431640625, "learning_rate": 3.8474721283860714e-07, "loss": -0.0363, "num_tokens": 143893249.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0170046091079712, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01630932115575536, "rewards/wordcountpos_reward/raw_geo/std": 0.06986030324738193, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792296, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1072.375, "completions/mean_terminated_length": 1072.375, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.658131626325265, "frac_reward_zero_std": 0.0, "grad_norm": 3.4826295004082715, "kl": 0.019287109375, "learning_rate": 3.844549789951954e-07, "loss": 0.0162, "num_tokens": 143938271.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9928193092346191, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11974317243605989, "rewards/wordcountpos_reward/raw_geo/std": 0.08747747694984785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1163.625, "completions/mean_terminated_length": 1115.571533203125, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.6583316663332667, "frac_reward_zero_std": 0.0, "grad_norm": 2.8778362988479547, "kl": 0.0142669677734375, "learning_rate": 3.8416282587228776e-07, "loss": -0.0311, "num_tokens": 143992649.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8625040054321289, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08664716576604911, "rewards/wordcountpos_reward/raw_geo/std": 0.1964829787519205, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 1053.0, "completions/mean_terminated_length": 1053.0, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.6585317063412682, "frac_reward_zero_std": 0.0, "grad_norm": 3.593557149328327, "kl": 0.017822265625, "learning_rate": 3.838707536123391e-07, "loss": -0.0141, "num_tokens": 144030289.0, "reward": 2.2351741790771484e-08, "reward_std": 1.02969491481781, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03067319911054779, "rewards/wordcountpos_reward/raw_geo/std": 0.04362669395728335, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1203.5625, "completions/mean_terminated_length": 1203.5625, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.6587317463492699, "frac_reward_zero_std": 0.0, "grad_norm": 3.3023165062598303, "kl": 0.020751953125, "learning_rate": 3.8357876235776555e-07, "loss": 0.0256, "num_tokens": 144078890.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0288233757019043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18256225860658276, "rewards/wordcountpos_reward/raw_geo/std": 0.20659470297421154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1227.5, "completions/mean_terminated_length": 1136.666748046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.6589317863572715, "frac_reward_zero_std": 0.0, "grad_norm": 2.9171725287933326, "kl": 0.0117340087890625, "learning_rate": 3.8328685225094304e-07, "loss": -0.0387, "num_tokens": 144131410.0, "reward": -7.450580596923828e-09, "reward_std": 1.0422894954681396, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.011847269039981147, "rewards/wordcountpos_reward/raw_geo/std": 0.10332695605346108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 976.25, "completions/mean_terminated_length": 976.25, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.659131826365273, "frac_reward_zero_std": 0.0, "grad_norm": 3.9933665344879654, "kl": 0.021270751953125, "learning_rate": 3.8299502343420844e-07, "loss": -0.0278, "num_tokens": 144175630.0, "reward": 0.0, "reward_std": 0.7138490080833435, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15052560197412088, "rewards/wordcountpos_reward/raw_geo/std": 0.052002673243756896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1246.3125, "completions/mean_terminated_length": 1229.4000244140625, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.6593318663732747, "frac_reward_zero_std": 0.0, "grad_norm": 3.014505160403178, "kl": 0.021240234375, "learning_rate": 3.8270327604985877e-07, "loss": 0.0031, "num_tokens": 144221643.0, "reward": -2.9802322387695312e-08, "reward_std": 0.999814510345459, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05131992488988453, "rewards/wordcountpos_reward/raw_geo/std": 0.11354255739234262, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1199.0, "completions/mean_terminated_length": 1156.0, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.6595319063812762, "frac_reward_zero_std": 0.0, "grad_norm": 2.940288856417073, "kl": 0.0189056396484375, "learning_rate": 3.824116102401513e-07, "loss": -0.0081, "num_tokens": 144264715.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9277156591415405, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0012569472425599682, "rewards/wordcountpos_reward/raw_geo/std": 0.07280368367671539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1095.25, "completions/mean_terminated_length": 1037.4285888671875, "completions/min_length": 505.0, "completions/min_terminated_length": 505.0, "epoch": 0.6597319463892779, "frac_reward_zero_std": 0.0, "grad_norm": 3.8703615953814388, "kl": 0.022979736328125, "learning_rate": 3.8212002614730377e-07, "loss": 0.0329, "num_tokens": 144305495.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9823676347732544, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014462055916942228, "rewards/wordcountpos_reward/raw_geo/std": 0.08675404069567551, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10610965676722954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1198.3125, "completions/mean_terminated_length": 1155.21435546875, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.6599319863972795, "frac_reward_zero_std": 0.0, "grad_norm": 3.1428135583493284, "kl": 0.0174560546875, "learning_rate": 3.8182852391349387e-07, "loss": -0.0014, "num_tokens": 144351044.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8698796033859253, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015269219827975368, "rewards/wordcountpos_reward/raw_geo/std": 0.06973855234185371, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 885.4375, "completions/mean_terminated_length": 885.4375, "completions/min_length": 605.0, "completions/min_terminated_length": 605.0, "epoch": 0.660132026405281, "frac_reward_zero_std": 0.0, "grad_norm": 3.999847282111096, "kl": 0.02001953125, "learning_rate": 3.815371036808591e-07, "loss": -0.0039, "num_tokens": 144383579.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0038111209869385, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09283497586211155, "rewards/wordcountpos_reward/raw_geo/std": 0.08526411043736117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1206.5, "completions/mean_terminated_length": 1186.933349609375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.6603320664132827, "frac_reward_zero_std": 0.0, "grad_norm": 3.2178024062336243, "kl": 0.020263671875, "learning_rate": 3.812457655914979e-07, "loss": 0.0168, "num_tokens": 144423883.0, "reward": 0.0, "reward_std": 0.7387831211090088, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025919246635128887, "rewards/wordcountpos_reward/raw_geo/std": 0.06467467575487375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1140.9375, "completions/mean_terminated_length": 1140.9375, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.6605321064212842, "frac_reward_zero_std": 0.0, "grad_norm": 3.2040988162948816, "kl": 0.018829345703125, "learning_rate": 3.8095450978746735e-07, "loss": 0.0139, "num_tokens": 144468682.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8542848825454712, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04992861991136116, "rewards/wordcountpos_reward/raw_geo/std": 0.08188122523664693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1288.5, "completions/mean_terminated_length": 1239.6923828125, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.6607321464292859, "frac_reward_zero_std": 0.0, "grad_norm": 3.4567411706432876, "kl": 0.022003173828125, "learning_rate": 3.8066333641078573e-07, "loss": -0.0323, "num_tokens": 144514946.0, "reward": 5.960464477539063e-08, "reward_std": 0.7113111615180969, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02737352862312872, "rewards/wordcountpos_reward/raw_geo/std": 0.09081341910071945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0859586463881842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1199.25, "completions/mean_terminated_length": 1156.2857666015625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6609321864372875, "frac_reward_zero_std": 0.0, "grad_norm": 3.5089469163852476, "kl": 0.018463134765625, "learning_rate": 3.803722456034301e-07, "loss": 0.0189, "num_tokens": 144555654.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7680255770683289, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2766686401749069, "rewards/wordcountpos_reward/raw_geo/std": 0.43541928322180545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 998.125, "completions/mean_terminated_length": 998.125, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.661132226445289, "frac_reward_zero_std": 0.0, "grad_norm": 3.7758205677338608, "kl": 0.019500732421875, "learning_rate": 3.800812375073379e-07, "loss": -0.004, "num_tokens": 144592104.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6134680509567261, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.048193257972926985, "rewards/wordcountpos_reward/raw_geo/std": 0.08441969951851801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512345, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1139.25, "completions/mean_terminated_length": 1087.71435546875, "completions/min_length": 628.0, "completions/min_terminated_length": 628.0, "epoch": 0.6613322664532907, "frac_reward_zero_std": 0.0, "grad_norm": 2.616702585370658, "kl": 0.0091400146484375, "learning_rate": 3.7979031226440563e-07, "loss": -0.0392, "num_tokens": 144636580.0, "reward": -1.862645149230957e-08, "reward_std": 1.0612332820892334, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.009163377579541125, "rewards/wordcountpos_reward/raw_geo/std": 0.054084691127333634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 965.125, "completions/mean_terminated_length": 965.125, "completions/min_length": 725.0, "completions/min_terminated_length": 725.0, "epoch": 0.6615323064612922, "frac_reward_zero_std": 0.0, "grad_norm": 2.8014457832948274, "kl": 0.012359619140625, "learning_rate": 3.794994700164902e-07, "loss": -0.0328, "num_tokens": 144674806.0, "reward": -2.60770320892334e-08, "reward_std": 1.06345534324646, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2891102508767018, "rewards/wordcountpos_reward/raw_geo/std": 0.09342192021105268, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1196.125, "completions/mean_terminated_length": 1175.86669921875, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.6617323464692939, "frac_reward_zero_std": 0.0, "grad_norm": 2.692714948493963, "kl": 0.0128173828125, "learning_rate": 3.792087109054073e-07, "loss": -0.0008, "num_tokens": 144720152.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9458194375038147, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046942675817700924, "rewards/wordcountpos_reward/raw_geo/std": 0.1109210735988391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1094.5625, "completions/mean_terminated_length": 1094.5625, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.6619323864772955, "frac_reward_zero_std": 0.0, "grad_norm": 3.716138919150422, "kl": 0.018402099609375, "learning_rate": 3.7891803507293253e-07, "loss": -0.0685, "num_tokens": 144769609.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8659053444862366, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0023379538605896055, "rewards/wordcountpos_reward/raw_geo/std": 0.17812066535863905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1079.8125, "completions/mean_terminated_length": 1079.8125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.662132426485297, "frac_reward_zero_std": 0.0, "grad_norm": 3.206067416687675, "kl": 0.0167694091796875, "learning_rate": 3.786274426608007e-07, "loss": -0.0148, "num_tokens": 144814150.0, "reward": -2.9802322387695312e-08, "reward_std": 1.034212589263916, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05763017982068614, "rewards/wordcountpos_reward/raw_geo/std": 0.07536068692350287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1026.0, "completions/mean_terminated_length": 1026.0, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.6623324664932987, "frac_reward_zero_std": 0.0, "grad_norm": 3.559096219772032, "kl": 0.020294189453125, "learning_rate": 3.7833693381070617e-07, "loss": -0.0249, "num_tokens": 144853382.0, "reward": -7.450580596923828e-09, "reward_std": 0.9378159046173096, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1264023958903083, "rewards/wordcountpos_reward/raw_geo/std": 0.24496200756596329, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1286.625, "completions/mean_terminated_length": 1256.1429443359375, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.6625325065013002, "frac_reward_zero_std": 0.0, "grad_norm": 3.277599453916886, "kl": 0.019683837890625, "learning_rate": 3.780465086643021e-07, "loss": 0.0118, "num_tokens": 144892624.0, "reward": 4.470348358154297e-08, "reward_std": 0.9330224990844727, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009929405919668328, "rewards/wordcountpos_reward/raw_geo/std": 0.07924899705616882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 947.125, "completions/mean_terminated_length": 947.125, "completions/min_length": 535.0, "completions/min_terminated_length": 535.0, "epoch": 0.6627325465093019, "frac_reward_zero_std": 0.0, "grad_norm": 3.7315929308314093, "kl": 0.023040771484375, "learning_rate": 3.7775616736320125e-07, "loss": -0.036, "num_tokens": 144923290.0, "reward": 2.2351741790771484e-08, "reward_std": 1.017561912536621, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02294995362132145, "rewards/wordcountpos_reward/raw_geo/std": 0.13359191113558913, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101761, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1098.8125, "completions/mean_terminated_length": 1072.0667724609375, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.6629325865173035, "frac_reward_zero_std": 0.0, "grad_norm": 2.51402809050669, "kl": 0.0133819580078125, "learning_rate": 3.7746591004897533e-07, "loss": -0.0543, "num_tokens": 144972831.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8756188154220581, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10417075170899441, "rewards/wordcountpos_reward/raw_geo/std": 0.08405490628398483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1065.3125, "completions/mean_terminated_length": 1065.3125, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.663132626525305, "frac_reward_zero_std": 0.0, "grad_norm": 3.5390675610390496, "kl": 0.0194091796875, "learning_rate": 3.771757368631552e-07, "loss": 0.012, "num_tokens": 145013852.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0345865488052368, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1392738611575359, "rewards/wordcountpos_reward/raw_geo/std": 0.07376799554635981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1037.9375, "completions/mean_terminated_length": 1037.9375, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.6633326665333067, "frac_reward_zero_std": 0.0, "grad_norm": 3.4979056638049646, "kl": 0.020721435546875, "learning_rate": 3.7688564794723045e-07, "loss": -0.021, "num_tokens": 145048467.0, "reward": 5.960464477539063e-08, "reward_std": 0.6812518835067749, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17144726148497308, "rewards/wordcountpos_reward/raw_geo/std": 0.0708749264594736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1193.75, "completions/mean_terminated_length": 1173.3333740234375, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.6635327065413082, "frac_reward_zero_std": 0.0, "grad_norm": 3.0265188792671007, "kl": 0.01241302490234375, "learning_rate": 3.7659564344264994e-07, "loss": -0.053, "num_tokens": 145099791.0, "reward": 7.450580596923828e-09, "reward_std": 1.0335580110549927, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1424612103793468, "rewards/wordcountpos_reward/raw_geo/std": 0.21057556937129052, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1308.875, "completions/mean_terminated_length": 1308.875, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.6637327465493099, "frac_reward_zero_std": 0.0, "grad_norm": 2.2983252830231344, "kl": 0.01080322265625, "learning_rate": 3.7630572349082103e-07, "loss": -0.0208, "num_tokens": 145147469.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8526941537857056, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1500546882223249, "rewards/wordcountpos_reward/raw_geo/std": 0.07924964449270579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1143.125, "completions/mean_terminated_length": 1119.3333740234375, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.6639327865573115, "frac_reward_zero_std": 0.0, "grad_norm": 3.47737678230712, "kl": 0.020751953125, "learning_rate": 3.7601588823311015e-07, "loss": 0.0203, "num_tokens": 145199335.0, "reward": 0.0, "reward_std": 0.6161817312240601, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1012587015211332, "rewards/wordcountpos_reward/raw_geo/std": 0.11644186239959226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1190.125, "completions/mean_terminated_length": 1086.8333740234375, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.664132826565313, "frac_reward_zero_std": 0.0, "grad_norm": 2.981856250629352, "kl": 0.023040771484375, "learning_rate": 3.7572613781084215e-07, "loss": -0.0051, "num_tokens": 145242785.0, "reward": 0.0, "reward_std": 0.7734684944152832, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15078023971955312, "rewards/wordcountpos_reward/raw_geo/std": 0.19191429084368136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1115.6875, "completions/mean_terminated_length": 1115.6875, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.6643328665733147, "frac_reward_zero_std": 0.0, "grad_norm": 2.7228252152982586, "kl": 0.01544189453125, "learning_rate": 3.7543647236530084e-07, "loss": 0.007, "num_tokens": 145280588.0, "reward": 0.0, "reward_std": 0.8408713340759277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05222630098449565, "rewards/wordcountpos_reward/raw_geo/std": 0.10033198294066971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1216.375, "completions/mean_terminated_length": 1121.8333740234375, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.6645329065813163, "frac_reward_zero_std": 0.0, "grad_norm": 2.7102734295000768, "kl": 0.01180267333984375, "learning_rate": 3.751468920377282e-07, "loss": 0.0252, "num_tokens": 145332618.0, "reward": 7.450580596923828e-09, "reward_std": 0.9762457013130188, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10676397062323048, "rewards/wordcountpos_reward/raw_geo/std": 0.09915497870495782, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1273.375, "completions/mean_terminated_length": 1241.0, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.6647329465893179, "frac_reward_zero_std": 0.0, "grad_norm": 3.23968725524877, "kl": 0.018157958984375, "learning_rate": 3.7485739696932496e-07, "loss": 0.0289, "num_tokens": 145378704.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9776238203048706, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09597951698757828, "rewards/wordcountpos_reward/raw_geo/std": 0.19158513064588975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1200.8125, "completions/mean_terminated_length": 1180.86669921875, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.6649329865973195, "frac_reward_zero_std": 0.0, "grad_norm": 3.235570630598221, "kl": 0.01934814453125, "learning_rate": 3.745679873012503e-07, "loss": -0.0024, "num_tokens": 145419013.0, "reward": -2.2351741790771484e-08, "reward_std": 1.00696861743927, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20794163947786737, "rewards/wordcountpos_reward/raw_geo/std": 0.18009450679438951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1103.625, "completions/mean_terminated_length": 1103.625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.665133026605321, "frac_reward_zero_std": 0.0, "grad_norm": 3.434200362726161, "kl": 0.02044677734375, "learning_rate": 3.742786631746215e-07, "loss": -0.039, "num_tokens": 145472063.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0495176315307617, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06798303990212821, "rewards/wordcountpos_reward/raw_geo/std": 0.08559276047065828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1223.0625, "completions/mean_terminated_length": 1204.60009765625, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.6653330666133227, "frac_reward_zero_std": 0.0, "grad_norm": 3.1688071868907333, "kl": 0.019439697265625, "learning_rate": 3.739894247305146e-07, "loss": -0.0143, "num_tokens": 145516328.0, "reward": 0.0, "reward_std": 0.9730132818222046, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.047442265385555446, "rewards/wordcountpos_reward/raw_geo/std": 0.0503299980032755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1175.0, "completions/mean_length": 1258.625, "completions/mean_terminated_length": 1017.25, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.6655331066213243, "frac_reward_zero_std": 0.0, "grad_norm": 2.388151533131199, "kl": 0.0124053955078125, "learning_rate": 3.7370027210996335e-07, "loss": 0.027, "num_tokens": 145553522.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9865334630012512, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004557452361531333, "rewards/wordcountpos_reward/raw_geo/std": 0.0625773876789323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845024, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1120.8125, "completions/mean_terminated_length": 1120.8125, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.6657331466293258, "frac_reward_zero_std": 0.0, "grad_norm": 3.2389690374935416, "kl": 0.018707275390625, "learning_rate": 3.734112054539602e-07, "loss": -0.0081, "num_tokens": 145595903.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8433341979980469, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08732008190708139, "rewards/wordcountpos_reward/raw_geo/std": 0.09196484188844986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1037.0, "completions/mean_terminated_length": 1037.0, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.6659331866373275, "frac_reward_zero_std": 0.0, "grad_norm": 3.0141523078772616, "kl": 0.0152740478515625, "learning_rate": 3.73122224903455e-07, "loss": -0.0071, "num_tokens": 145625983.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7914624214172363, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01920348754502259, "rewards/wordcountpos_reward/raw_geo/std": 0.16122026435857298, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 1010.3125, "completions/mean_terminated_length": 1010.3125, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.666133226645329, "frac_reward_zero_std": 0.0, "grad_norm": 3.348038276482331, "kl": 0.016448974609375, "learning_rate": 3.7283333059935617e-07, "loss": 0.005, "num_tokens": 145667060.0, "reward": 2.9802322387695312e-08, "reward_std": 1.008789300918579, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013887287930967605, "rewards/wordcountpos_reward/raw_geo/std": 0.07926277824883189, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1259.0625, "completions/mean_terminated_length": 1178.75, "completions/min_length": 1064.0, "completions/min_terminated_length": 1064.0, "epoch": 0.6663332666533307, "frac_reward_zero_std": 0.0, "grad_norm": 2.6779128459695447, "kl": 0.017303466796875, "learning_rate": 3.725445226825297e-07, "loss": -0.0028, "num_tokens": 145703021.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6090520620346069, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04118142666419033, "rewards/wordcountpos_reward/raw_geo/std": 0.12937436449455195, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1379.375, "completions/mean_terminated_length": 1339.166748046875, "completions/min_length": 1198.0, "completions/min_terminated_length": 1198.0, "epoch": 0.6665333066613323, "frac_reward_zero_std": 0.0, "grad_norm": 3.0491967199917935, "kl": 0.017578125, "learning_rate": 3.722558012938001e-07, "loss": -0.0099, "num_tokens": 145757195.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9603745937347412, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09856390553701008, "rewards/wordcountpos_reward/raw_geo/std": 0.050462862159458145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1013.25, "completions/mean_terminated_length": 900.923095703125, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.6667333466693338, "frac_reward_zero_std": 0.0, "grad_norm": 3.0535677205395815, "kl": 0.018157958984375, "learning_rate": 3.719671665739484e-07, "loss": 0.0638, "num_tokens": 145799711.0, "reward": 0.0, "reward_std": 0.9794710874557495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17611732532145646, "rewards/wordcountpos_reward/raw_geo/std": 0.2699685601816102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.094182643679026, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1249.5, "completions/mean_terminated_length": 1232.800048828125, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.6669333866773355, "frac_reward_zero_std": 0.0, "grad_norm": 2.9773001154969885, "kl": 0.01861572265625, "learning_rate": 3.716786186637152e-07, "loss": -0.0398, "num_tokens": 145854887.0, "reward": 0.0, "reward_std": 0.8989537954330444, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0015755176585117096, "rewards/wordcountpos_reward/raw_geo/std": 0.07624490423787907, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1235.5, "completions/mean_terminated_length": 1235.5, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.667133426685337, "frac_reward_zero_std": 0.0, "grad_norm": 3.0269032467283514, "kl": 0.015106201171875, "learning_rate": 3.71390157703797e-07, "loss": -0.0004, "num_tokens": 145901735.0, "reward": 0.0, "reward_std": 0.8659965991973877, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15253303147789993, "rewards/wordcountpos_reward/raw_geo/std": 0.10808495127609283, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1073.75, "completions/mean_terminated_length": 1045.3333740234375, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.6673334666933387, "frac_reward_zero_std": 0.0, "grad_norm": 3.3288395947328824, "kl": 0.01568603515625, "learning_rate": 3.711017838348491e-07, "loss": -0.0205, "num_tokens": 145948571.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0621402263641357, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0055462512365661056, "rewards/wordcountpos_reward/raw_geo/std": 0.07117314423393445, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15533714826025882, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1355.5, "completions/mean_terminated_length": 1268.800048828125, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.6675335067013403, "frac_reward_zero_std": 0.0, "grad_norm": 2.77938331621578, "kl": 0.017242431640625, "learning_rate": 3.7081349719748367e-07, "loss": -0.0359, "num_tokens": 145993923.0, "reward": 0.0, "reward_std": 0.6943628191947937, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1575556273587909, "rewards/wordcountpos_reward/raw_geo/std": 0.08985990265392352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 900.5625, "completions/mean_terminated_length": 900.5625, "completions/min_length": 569.0, "completions/min_terminated_length": 569.0, "epoch": 0.6677335467093418, "frac_reward_zero_std": 0.0, "grad_norm": 3.0021118781696985, "kl": 0.015411376953125, "learning_rate": 3.7052529793227073e-07, "loss": -0.0162, "num_tokens": 146031084.0, "reward": 0.0, "reward_std": 0.8033987283706665, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04632383929219882, "rewards/wordcountpos_reward/raw_geo/std": 0.21600842479487722, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 971.0, "completions/mean_terminated_length": 971.0, "completions/min_length": 649.0, "completions/min_terminated_length": 649.0, "epoch": 0.6679335867173435, "frac_reward_zero_std": 0.0, "grad_norm": 4.0440740782194515, "kl": 0.021240234375, "learning_rate": 3.7023718617973745e-07, "loss": -0.0254, "num_tokens": 146072436.0, "reward": 0.0, "reward_std": 0.370272696018219, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09817053713382626, "rewards/wordcountpos_reward/raw_geo/std": 0.10849490458860554, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.15752718754175363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1289.8125, "completions/mean_terminated_length": 1194.272705078125, "completions/min_length": 1077.0, "completions/min_terminated_length": 1077.0, "epoch": 0.668133626725345, "frac_reward_zero_std": 0.0, "grad_norm": 2.263066680067384, "kl": 0.0107879638671875, "learning_rate": 3.6994916208036853e-07, "loss": -0.0196, "num_tokens": 146127289.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9866659641265869, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014429250673795989, "rewards/wordcountpos_reward/raw_geo/std": 0.09120151497796306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1375.1875, "completions/mean_terminated_length": 1250.375, "completions/min_length": 1115.0, "completions/min_terminated_length": 1115.0, "epoch": 0.6683336667333467, "frac_reward_zero_std": 0.0, "grad_norm": 2.0267590965428837, "kl": 0.013214111328125, "learning_rate": 3.696612257746057e-07, "loss": -0.0332, "num_tokens": 146184484.0, "reward": 0.0, "reward_std": 0.9050445556640625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10458226483120857, "rewards/wordcountpos_reward/raw_geo/std": 0.12128734107944333, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13437096247164249, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1095.9375, "completions/mean_terminated_length": 1095.9375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.6685337067413483, "frac_reward_zero_std": 0.0, "grad_norm": 3.449453932317137, "kl": 0.01812744140625, "learning_rate": 3.6937337740284813e-07, "loss": 0.0138, "num_tokens": 146222491.0, "reward": 2.421438694000244e-08, "reward_std": 1.0182061195373535, "rewards/wordcountpos_reward/mean": 2.421438694000244e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09772594187243339, "rewards/wordcountpos_reward/raw_geo/std": 0.0755703975995813, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1288.25, "completions/mean_terminated_length": 1258.0, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.6687337467493498, "frac_reward_zero_std": 0.0, "grad_norm": 2.613938761337795, "kl": 0.013519287109375, "learning_rate": 3.690856171054518e-07, "loss": -0.0379, "num_tokens": 146278087.0, "reward": 0.0, "reward_std": 0.5619757771492004, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14228158151487702, "rewards/wordcountpos_reward/raw_geo/std": 0.1014453492973835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1275.625, "completions/mean_terminated_length": 1243.571533203125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.6689337867573515, "frac_reward_zero_std": 0.0, "grad_norm": 3.1199056597360073, "kl": 0.018096923828125, "learning_rate": 3.687979450227303e-07, "loss": -0.0121, "num_tokens": 146332689.0, "reward": 0.0, "reward_std": 0.42847785353660583, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2073703681927702, "rewards/wordcountpos_reward/raw_geo/std": 0.09554911761670902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1230.8125, "completions/mean_terminated_length": 1192.357177734375, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.669133826765353, "frac_reward_zero_std": 0.0, "grad_norm": 3.026907723243432, "kl": 0.0141143798828125, "learning_rate": 3.685103612949535e-07, "loss": -0.0614, "num_tokens": 146383486.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8412420749664307, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03535969661912156, "rewards/wordcountpos_reward/raw_geo/std": 0.0934752373586306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16594287281181147, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 1016.875, "completions/mean_terminated_length": 1016.875, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.6693338667733547, "frac_reward_zero_std": 0.0, "grad_norm": 3.815364183647527, "kl": 0.018585205078125, "learning_rate": 3.682228660623489e-07, "loss": -0.0053, "num_tokens": 146423044.0, "reward": 0.0, "reward_std": 0.8502327799797058, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04177317892329413, "rewards/wordcountpos_reward/raw_geo/std": 0.19522493140725874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1371.625, "completions/mean_terminated_length": 1313.2728271484375, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.6695339067813563, "frac_reward_zero_std": 0.0, "grad_norm": 2.5916622641236255, "kl": 0.0144195556640625, "learning_rate": 3.6793545946509996e-07, "loss": -0.0171, "num_tokens": 146477078.0, "reward": -1.4901161193847656e-08, "reward_std": 1.014261245727539, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016534739696856048, "rewards/wordcountpos_reward/raw_geo/std": 0.05480286489373899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 1010.5, "completions/mean_terminated_length": 1010.5, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.6697339467893578, "frac_reward_zero_std": 0.0, "grad_norm": 3.644660357248143, "kl": 0.019012451171875, "learning_rate": 3.676481416433478e-07, "loss": -0.0293, "num_tokens": 146516486.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8712624907493591, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040356014084770866, "rewards/wordcountpos_reward/raw_geo/std": 0.05495003261555892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1083.5625, "completions/mean_terminated_length": 987.4615478515625, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.6699339867973595, "frac_reward_zero_std": 0.0, "grad_norm": 3.203915622138906, "kl": 0.01495361328125, "learning_rate": 3.673609127371897e-07, "loss": 0.0062, "num_tokens": 146569439.0, "reward": 0.0, "reward_std": 1.0015510320663452, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04783083639484891, "rewards/wordcountpos_reward/raw_geo/std": 0.08365212198473514, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1164.25, "completions/mean_terminated_length": 1116.2857666015625, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.670134026805361, "frac_reward_zero_std": 0.0, "grad_norm": 3.081881842671262, "kl": 0.019012451171875, "learning_rate": 3.6707377288668017e-07, "loss": 0.0459, "num_tokens": 146616155.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9640661478042603, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05346288707871314, "rewards/wordcountpos_reward/raw_geo/std": 0.0752853394212812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1211.5, "completions/mean_terminated_length": 1170.2857666015625, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.6703340668133627, "frac_reward_zero_std": 0.0, "grad_norm": 3.288755328891892, "kl": 0.018798828125, "learning_rate": 3.6678672223182936e-07, "loss": 0.0301, "num_tokens": 146662043.0, "reward": 0.0, "reward_std": 0.8841336369514465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13522596215988184, "rewards/wordcountpos_reward/raw_geo/std": 0.14389887543293056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1028.875, "completions/mean_terminated_length": 997.4667358398438, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.6705341068213643, "frac_reward_zero_std": 0.0, "grad_norm": 3.440652572971669, "kl": 0.0178375244140625, "learning_rate": 3.6649976091260503e-07, "loss": 0.0032, "num_tokens": 146702281.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0490752458572388, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023032007871991174, "rewards/wordcountpos_reward/raw_geo/std": 0.0657429018851577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1173.375, "completions/mean_terminated_length": 1173.375, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.6707341468293658, "frac_reward_zero_std": 0.0, "grad_norm": 1.9248429087141499, "kl": 0.0088043212890625, "learning_rate": 3.662128890689302e-07, "loss": -0.004, "num_tokens": 146748439.0, "reward": 5.960464477539063e-08, "reward_std": 0.704389750957489, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06643219414219527, "rewards/wordcountpos_reward/raw_geo/std": 0.11632721821814898, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1327.4375, "completions/mean_terminated_length": 1302.7857666015625, "completions/min_length": 1055.0, "completions/min_terminated_length": 1055.0, "epoch": 0.6709341868373675, "frac_reward_zero_std": 0.0, "grad_norm": 2.5773009792312918, "kl": 0.0146636962890625, "learning_rate": 3.659261068406855e-07, "loss": -0.0027, "num_tokens": 146797366.0, "reward": 0.0, "reward_std": 0.5590336322784424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09118622938309393, "rewards/wordcountpos_reward/raw_geo/std": 0.12798280884617183, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1372.1875, "completions/mean_terminated_length": 1272.77783203125, "completions/min_length": 1129.0, "completions/min_terminated_length": 1129.0, "epoch": 0.671134226845369, "frac_reward_zero_std": 0.0, "grad_norm": 2.703103097340223, "kl": 0.0173492431640625, "learning_rate": 3.656394143677068e-07, "loss": -0.0075, "num_tokens": 146845545.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9062733054161072, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09588389913710764, "rewards/wordcountpos_reward/raw_geo/std": 0.19907196910673755, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1223.5, "completions/mean_terminated_length": 1223.5, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.6713342668533707, "frac_reward_zero_std": 0.0, "grad_norm": 2.7591332296815767, "kl": 0.015869140625, "learning_rate": 3.653528117897868e-07, "loss": -0.0255, "num_tokens": 146879993.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0321934223175049, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10663468889987533, "rewards/wordcountpos_reward/raw_geo/std": 0.13580817953599397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1267.3125, "completions/mean_terminated_length": 1213.615478515625, "completions/min_length": 1049.0, "completions/min_terminated_length": 1049.0, "epoch": 0.6715343068613723, "frac_reward_zero_std": 0.0, "grad_norm": 3.3257942696990543, "kl": 0.0172119140625, "learning_rate": 3.650662992466743e-07, "loss": -0.0265, "num_tokens": 146925734.0, "reward": 0.0, "reward_std": 0.9918062686920166, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011082358489008338, "rewards/wordcountpos_reward/raw_geo/std": 0.16013198247626467, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1131.6875, "completions/mean_terminated_length": 1131.6875, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.6717343468693738, "frac_reward_zero_std": 0.0, "grad_norm": 3.176928823712167, "kl": 0.0164947509765625, "learning_rate": 3.647798768780742e-07, "loss": -0.0565, "num_tokens": 146983769.0, "reward": 0.0, "reward_std": 0.931851863861084, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.026168485681619162, "rewards/wordcountpos_reward/raw_geo/std": 0.11945225449996111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1046.5625, "completions/mean_terminated_length": 1046.5625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.6719343868773755, "frac_reward_zero_std": 0.0, "grad_norm": 3.8697602051347144, "kl": 0.0216064453125, "learning_rate": 3.6449354482364705e-07, "loss": -0.0473, "num_tokens": 147032522.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0426734685897827, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21895105082236638, "rewards/wordcountpos_reward/raw_geo/std": 0.17620613521198925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15396007178390023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1055.8125, "completions/mean_terminated_length": 1026.2000732421875, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.6721344268853771, "frac_reward_zero_std": 0.0, "grad_norm": 3.580595772809599, "kl": 0.02099609375, "learning_rate": 3.6420730322300987e-07, "loss": -0.007, "num_tokens": 147077527.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8755192756652832, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10207680029079158, "rewards/wordcountpos_reward/raw_geo/std": 0.16828110998849705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 1025.125, "completions/mean_terminated_length": 1025.125, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.6723344668933787, "frac_reward_zero_std": 0.0, "grad_norm": 3.438085467373346, "kl": 0.017791748046875, "learning_rate": 3.6392115221573515e-07, "loss": -0.0188, "num_tokens": 147120017.0, "reward": -2.9802322387695312e-08, "reward_std": 0.775924563407898, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024107637017544065, "rewards/wordcountpos_reward/raw_geo/std": 0.11100291927808571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1334.0625, "completions/mean_terminated_length": 1278.75, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.6725345069013803, "frac_reward_zero_std": 0.0, "grad_norm": 3.3395047156256257, "kl": 0.01934814453125, "learning_rate": 3.636350919413518e-07, "loss": 0.0149, "num_tokens": 147174026.0, "reward": 7.450580596923828e-09, "reward_std": 1.0041682720184326, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.03918294895167247, "rewards/wordcountpos_reward/raw_geo/std": 0.13439682123471508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1226.875, "completions/mean_terminated_length": 1135.8333740234375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.6727345469093818, "frac_reward_zero_std": 0.0, "grad_norm": 2.9459385841217025, "kl": 0.021820068359375, "learning_rate": 3.633491225393437e-07, "loss": -0.0366, "num_tokens": 147217816.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8612076044082642, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.016224602713494945, "rewards/wordcountpos_reward/raw_geo/std": 0.33321022243267195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 909.75, "completions/mean_terminated_length": 909.75, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.6729345869173835, "frac_reward_zero_std": 0.0, "grad_norm": 3.4891606831563973, "kl": 0.01666259765625, "learning_rate": 3.630632441491511e-07, "loss": -0.0011, "num_tokens": 147256924.0, "reward": 2.9802322387695312e-08, "reward_std": 0.907329797744751, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2363060423542781, "rewards/wordcountpos_reward/raw_geo/std": 0.12317468174484035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222522, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1195.8125, "completions/mean_terminated_length": 1175.533447265625, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.6731346269253851, "frac_reward_zero_std": 0.0, "grad_norm": 3.1720827346194214, "kl": 0.018341064453125, "learning_rate": 3.6277745691016947e-07, "loss": -0.0171, "num_tokens": 147301401.0, "reward": 0.0, "reward_std": 1.0494153499603271, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24385843364681847, "rewards/wordcountpos_reward/raw_geo/std": 0.09754619551307539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1166.125, "completions/mean_terminated_length": 1166.125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.6733346669333867, "frac_reward_zero_std": 0.0, "grad_norm": 3.4521088987983073, "kl": 0.018402099609375, "learning_rate": 3.6249176096175016e-07, "loss": 0.0133, "num_tokens": 147337371.0, "reward": 2.9802322387695312e-08, "reward_std": 0.791492760181427, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015860173463585477, "rewards/wordcountpos_reward/raw_geo/std": 0.08613090961405154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1206.1875, "completions/mean_terminated_length": 1164.21435546875, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.6735347069413883, "frac_reward_zero_std": 0.0, "grad_norm": 2.9345938480725335, "kl": 0.0142364501953125, "learning_rate": 3.622061564431995e-07, "loss": -0.0179, "num_tokens": 147383150.0, "reward": 0.0, "reward_std": 0.5174683332443237, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13384779428747318, "rewards/wordcountpos_reward/raw_geo/std": 0.06695431678264137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1207.375, "completions/mean_terminated_length": 1165.571533203125, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.6737347469493898, "frac_reward_zero_std": 0.0, "grad_norm": 3.0496612757083623, "kl": 0.02056884765625, "learning_rate": 3.619206434937797e-07, "loss": 0.0087, "num_tokens": 147417908.0, "reward": 3.725290298461914e-09, "reward_std": 0.9602419137954712, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02773787327561019, "rewards/wordcountpos_reward/raw_geo/std": 0.17782119815147532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1232.75, "completions/mean_terminated_length": 1143.666748046875, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6739347869573915, "frac_reward_zero_std": 0.0, "grad_norm": 3.0972901341714123, "kl": 0.018157958984375, "learning_rate": 3.6163522225270813e-07, "loss": 0.0057, "num_tokens": 147472864.0, "reward": 0.0, "reward_std": 0.990334689617157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015217018341178437, "rewards/wordcountpos_reward/raw_geo/std": 0.0961271909225397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1222.0625, "completions/mean_terminated_length": 1182.357177734375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.6741348269653931, "frac_reward_zero_std": 0.0, "grad_norm": 3.334413837300474, "kl": 0.01568603515625, "learning_rate": 3.613498928591578e-07, "loss": -0.0132, "num_tokens": 147521473.0, "reward": 0.0, "reward_std": 0.9880002737045288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03419448271472868, "rewards/wordcountpos_reward/raw_geo/std": 0.13103425854650386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362769, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1076.8125, "completions/mean_terminated_length": 1076.8125, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.6743348669733947, "frac_reward_zero_std": 0.0, "grad_norm": 3.4250497173618593, "kl": 0.021087646484375, "learning_rate": 3.6106465545225617e-07, "loss": -0.0083, "num_tokens": 147557742.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8835047483444214, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012282247704396568, "rewards/wordcountpos_reward/raw_geo/std": 0.07103000899713321, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1069.0, "completions/max_terminated_length": 1069.0, "completions/mean_length": 948.0, "completions/mean_terminated_length": 948.0, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.6745349069813963, "frac_reward_zero_std": 0.0, "grad_norm": 3.776019340545135, "kl": 0.0163726806640625, "learning_rate": 3.607795101710865e-07, "loss": -0.0188, "num_tokens": 147605502.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7774950265884399, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07194996752109244, "rewards/wordcountpos_reward/raw_geo/std": 0.08247797869634313, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363346, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1063.125, "completions/mean_terminated_length": 1063.125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.6747349469893978, "frac_reward_zero_std": 0.0, "grad_norm": 3.6173503193473198, "kl": 0.023223876953125, "learning_rate": 3.6049445715468684e-07, "loss": -0.0332, "num_tokens": 147650936.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9630783796310425, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22093682022514816, "rewards/wordcountpos_reward/raw_geo/std": 0.10812731414481883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09339283817414601, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1034.375, "completions/mean_terminated_length": 1034.375, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.6749349869973995, "frac_reward_zero_std": 0.0, "grad_norm": 3.770714788112999, "kl": 0.020111083984375, "learning_rate": 3.602094965420506e-07, "loss": -0.025, "num_tokens": 147690598.0, "reward": 0.0, "reward_std": 0.9002038240432739, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.007331105154757003, "rewards/wordcountpos_reward/raw_geo/std": 0.03101522003084131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1169.75, "completions/mean_terminated_length": 1169.75, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.6751350270054011, "frac_reward_zero_std": 0.0, "grad_norm": 3.1087077218661068, "kl": 0.0152435302734375, "learning_rate": 3.599246284721256e-07, "loss": -0.067, "num_tokens": 147733506.0, "reward": -5.960464477539063e-08, "reward_std": 0.7856888771057129, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18978945450760193, "rewards/wordcountpos_reward/raw_geo/std": 0.22460112417181172, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1055.9375, "completions/mean_terminated_length": 992.5000610351562, "completions/min_length": 621.0, "completions/min_terminated_length": 621.0, "epoch": 0.6753350670134027, "frac_reward_zero_std": 0.0, "grad_norm": 2.66870379856627, "kl": 0.0114288330078125, "learning_rate": 3.596398530838147e-07, "loss": 0.031, "num_tokens": 147765377.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9125845432281494, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20782439889504103, "rewards/wordcountpos_reward/raw_geo/std": 0.14605873960927596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 861.0, "completions/mean_terminated_length": 861.0, "completions/min_length": 473.0, "completions/min_terminated_length": 473.0, "epoch": 0.6755351070214043, "frac_reward_zero_std": 0.0, "grad_norm": 3.8332288265607057, "kl": 0.02105712890625, "learning_rate": 3.5935517051597606e-07, "loss": -0.0105, "num_tokens": 147798153.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9204510450363159, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10595359052246604, "rewards/wordcountpos_reward/raw_geo/std": 0.07269287464776567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 977.875, "completions/mean_terminated_length": 977.875, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.6757351470294058, "frac_reward_zero_std": 0.0, "grad_norm": 3.5499550199330625, "kl": 0.01873779296875, "learning_rate": 3.5907058090742183e-07, "loss": 0.0013, "num_tokens": 147838311.0, "reward": 0.0, "reward_std": 0.39404308795928955, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14124854623903507, "rewards/wordcountpos_reward/raw_geo/std": 0.1464725569131139, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 979.5, "completions/mean_terminated_length": 979.5, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.6759351870374075, "frac_reward_zero_std": 0.0, "grad_norm": 2.0669669432970275, "kl": 0.01076507568359375, "learning_rate": 3.5878608439691936e-07, "loss": 0.0689, "num_tokens": 147873687.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9540202617645264, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016238060578826312, "rewards/wordcountpos_reward/raw_geo/std": 0.031897598682339004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567835, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1258.375, "completions/mean_terminated_length": 1113.4000244140625, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.6761352270454091, "frac_reward_zero_std": 0.0, "grad_norm": 2.561496590409114, "kl": 0.0131683349609375, "learning_rate": 3.5850168112319035e-07, "loss": -0.014, "num_tokens": 147927989.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0570378303527832, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15767900636072765, "rewards/wordcountpos_reward/raw_geo/std": 0.06406278827428226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1287.625, "completions/mean_terminated_length": 1238.615478515625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.6763352670534106, "frac_reward_zero_std": 0.0, "grad_norm": 3.061517329148706, "kl": 0.0156707763671875, "learning_rate": 3.582173712249115e-07, "loss": -0.0056, "num_tokens": 147971455.0, "reward": 0.0, "reward_std": 0.8510395288467407, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04039555352706883, "rewards/wordcountpos_reward/raw_geo/std": 0.08016647887984972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1015.625, "completions/mean_terminated_length": 1015.625, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.6765353070614123, "frac_reward_zero_std": 0.0, "grad_norm": 2.7731730815860507, "kl": 0.0097503662109375, "learning_rate": 3.5793315484071306e-07, "loss": -0.0008, "num_tokens": 148017185.0, "reward": 7.450580596923828e-09, "reward_std": 0.9821830987930298, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.040588497574901544, "rewards/wordcountpos_reward/raw_geo/std": 0.05365702988206016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386656, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1124.6875, "completions/mean_terminated_length": 1124.6875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.6767353470694139, "frac_reward_zero_std": 0.0, "grad_norm": 3.2345566272490696, "kl": 0.0160369873046875, "learning_rate": 3.576490321091805e-07, "loss": -0.0191, "num_tokens": 148053300.0, "reward": -5.960464477539063e-08, "reward_std": 0.8921633958816528, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19086151272471255, "rewards/wordcountpos_reward/raw_geo/std": 0.25890527959401427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1169.25, "completions/mean_terminated_length": 1147.2000732421875, "completions/min_length": 711.0, "completions/min_terminated_length": 711.0, "epoch": 0.6769353870774155, "frac_reward_zero_std": 0.0, "grad_norm": 2.466829632323017, "kl": 0.011688232421875, "learning_rate": 3.5736500316885333e-07, "loss": 0.0191, "num_tokens": 148107264.0, "reward": 0.0, "reward_std": 0.7304632663726807, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02963066559796835, "rewards/wordcountpos_reward/raw_geo/std": 0.12111456279361577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1135.875, "completions/mean_terminated_length": 1135.875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.6771354270854171, "frac_reward_zero_std": 0.0, "grad_norm": 3.1136839037381225, "kl": 0.01629638671875, "learning_rate": 3.5708106815822546e-07, "loss": 0.0033, "num_tokens": 148151742.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9837215542793274, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18684722965177827, "rewards/wordcountpos_reward/raw_geo/std": 0.30191340451519744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1210295341978484, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1093.8125, "completions/mean_terminated_length": 1093.8125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.6773354670934186, "frac_reward_zero_std": 0.0, "grad_norm": 2.686374952578419, "kl": 0.01483154296875, "learning_rate": 3.5679722721574466e-07, "loss": -0.0001, "num_tokens": 148188011.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9900036454200745, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0487148917821085, "rewards/wordcountpos_reward/raw_geo/std": 0.0689475789866928, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1081.9375, "completions/mean_terminated_length": 1081.9375, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.6775355071014203, "frac_reward_zero_std": 0.0, "grad_norm": 2.0207590041444137, "kl": 0.0129241943359375, "learning_rate": 3.565134804798131e-07, "loss": -0.0462, "num_tokens": 148223898.0, "reward": 4.0978193283081055e-08, "reward_std": 1.0457135438919067, "rewards/wordcountpos_reward/mean": 4.0978193283081055e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08888465291958966, "rewards/wordcountpos_reward/raw_geo/std": 0.05688196522025875, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1266.25, "completions/mean_terminated_length": 1266.25, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.6777355471094219, "frac_reward_zero_std": 0.0, "grad_norm": 2.4201780841209644, "kl": 0.013458251953125, "learning_rate": 3.5622982808878723e-07, "loss": -0.0123, "num_tokens": 148267270.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6821728944778442, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013658780402291114, "rewards/wordcountpos_reward/raw_geo/std": 0.04127848767722309, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1103.25, "completions/mean_terminated_length": 1103.25, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.6779355871174235, "frac_reward_zero_std": 0.0, "grad_norm": 2.76233333194087, "kl": 0.015594482421875, "learning_rate": 3.559462701809771e-07, "loss": 0.0158, "num_tokens": 148316090.0, "reward": 0.0, "reward_std": 0.6825799942016602, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10585862056818246, "rewards/wordcountpos_reward/raw_geo/std": 0.14364981299826607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1153.375, "completions/mean_terminated_length": 1153.375, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.6781356271254251, "frac_reward_zero_std": 0.0, "grad_norm": 3.230168370254695, "kl": 0.0184326171875, "learning_rate": 3.556628068946466e-07, "loss": -0.0068, "num_tokens": 148359104.0, "reward": -7.450580596923828e-09, "reward_std": 1.0447518825531006, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.020230283210499057, "rewards/wordcountpos_reward/raw_geo/std": 0.022443056554528162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1394.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1128.6875, "completions/mean_terminated_length": 1128.6875, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.6783356671334266, "frac_reward_zero_std": 0.0, "grad_norm": 3.3227370791239377, "kl": 0.018768310546875, "learning_rate": 3.553794383680142e-07, "loss": 0.006, "num_tokens": 148398267.0, "reward": 5.960464477539063e-08, "reward_std": 0.9673022031784058, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006545587788228004, "rewards/wordcountpos_reward/raw_geo/std": 0.06976115037210573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1121.0, "completions/mean_terminated_length": 1121.0, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.6785357071414283, "frac_reward_zero_std": 0.0, "grad_norm": 3.5278105258132375, "kl": 0.0200347900390625, "learning_rate": 3.550961647392512e-07, "loss": 0.0309, "num_tokens": 148444827.0, "reward": 0.0, "reward_std": 0.5587611198425293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12300073980552242, "rewards/wordcountpos_reward/raw_geo/std": 0.150623164662533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 1024.5625, "completions/mean_terminated_length": 1024.5625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.6787357471494299, "frac_reward_zero_std": 0.0, "grad_norm": 2.700006927387129, "kl": 0.011203765869140625, "learning_rate": 3.5481298614648335e-07, "loss": -0.0292, "num_tokens": 148475396.0, "reward": 0.0, "reward_std": 0.8936270475387573, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13597841225528887, "rewards/wordcountpos_reward/raw_geo/std": 0.13761093270521932, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1278.6875, "completions/mean_terminated_length": 1227.615478515625, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.6789357871574315, "frac_reward_zero_std": 0.0, "grad_norm": 3.1184604720293465, "kl": 0.016754150390625, "learning_rate": 3.545299027277897e-07, "loss": -0.0123, "num_tokens": 148518087.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0048422813415527, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035613741035123284, "rewards/wordcountpos_reward/raw_geo/std": 0.10634196785884542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1119.1875, "completions/mean_terminated_length": 1119.1875, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.6791358271654331, "frac_reward_zero_std": 0.0, "grad_norm": 3.3292087040354374, "kl": 0.018890380859375, "learning_rate": 3.542469146212034e-07, "loss": 0.011, "num_tokens": 148563898.0, "reward": 0.0, "reward_std": 0.4072377681732178, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17700441696798566, "rewards/wordcountpos_reward/raw_geo/std": 0.18983127983570894, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1084.125, "completions/mean_terminated_length": 1084.125, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.6793358671734346, "frac_reward_zero_std": 0.0, "grad_norm": 3.2256938470685914, "kl": 0.02020263671875, "learning_rate": 3.539640219647102e-07, "loss": -0.0062, "num_tokens": 148607724.0, "reward": 0.0, "reward_std": 0.9497401714324951, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.002128183373574017, "rewards/wordcountpos_reward/raw_geo/std": 0.0389696327712577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1153.5, "completions/mean_terminated_length": 1130.4000244140625, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.6795359071814363, "frac_reward_zero_std": 0.0, "grad_norm": 3.5875562061708552, "kl": 0.018585205078125, "learning_rate": 3.5368122489625e-07, "loss": 0.0347, "num_tokens": 148649004.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8486381769180298, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09733218253632932, "rewards/wordcountpos_reward/raw_geo/std": 0.11372267259325801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1019.0625, "completions/mean_terminated_length": 1019.0625, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.6797359471894379, "frac_reward_zero_std": 0.0, "grad_norm": 2.360528261933643, "kl": 0.0097198486328125, "learning_rate": 3.5339852355371604e-07, "loss": -0.0032, "num_tokens": 148690957.0, "reward": 0.0, "reward_std": 0.7591204643249512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.037838868381226964, "rewards/wordcountpos_reward/raw_geo/std": 0.11143782081610987, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.102469507659596, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1178.125, "completions/mean_terminated_length": 1132.1429443359375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6799359871974395, "frac_reward_zero_std": 0.0, "grad_norm": 2.938244015289169, "kl": 0.0154876708984375, "learning_rate": 3.531159180749549e-07, "loss": -0.0403, "num_tokens": 148737551.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0394388437271118, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007349320714937999, "rewards/wordcountpos_reward/raw_geo/std": 0.08416569442795142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1063.8125, "completions/mean_terminated_length": 1063.8125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.6801360272054411, "frac_reward_zero_std": 0.0, "grad_norm": 3.383688003321384, "kl": 0.0198974609375, "learning_rate": 3.52833408597766e-07, "loss": 0.0343, "num_tokens": 148776876.0, "reward": 0.0, "reward_std": 0.5769611597061157, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03981213998944684, "rewards/wordcountpos_reward/raw_geo/std": 0.17454403888997538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1157.8125, "completions/mean_terminated_length": 1157.8125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.6803360672134426, "frac_reward_zero_std": 0.0, "grad_norm": 3.193397407218403, "kl": 0.0217437744140625, "learning_rate": 3.525509952599024e-07, "loss": 0.0003, "num_tokens": 148817313.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0545434951782227, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02165894482321264, "rewards/wordcountpos_reward/raw_geo/std": 0.09416216595715829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1174.625, "completions/mean_terminated_length": 1152.933349609375, "completions/min_length": 601.0, "completions/min_terminated_length": 601.0, "epoch": 0.6805361072214443, "frac_reward_zero_std": 0.0, "grad_norm": 3.3759463628699904, "kl": 0.021575927734375, "learning_rate": 3.5226867819907015e-07, "loss": 0.0505, "num_tokens": 148871939.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9802390933036804, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19167528944453277, "rewards/wordcountpos_reward/raw_geo/std": 0.08545120473799458, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 919.5625, "completions/mean_terminated_length": 919.5625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.6807361472294459, "frac_reward_zero_std": 0.0, "grad_norm": 3.348925132778996, "kl": 0.01152801513671875, "learning_rate": 3.519864575529283e-07, "loss": -0.0133, "num_tokens": 148912252.0, "reward": 0.0, "reward_std": 0.7171638011932373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.030650843595430955, "rewards/wordcountpos_reward/raw_geo/std": 0.11480497952481016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0582141639885766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1117.125, "completions/mean_terminated_length": 1117.125, "completions/min_length": 907.0, "completions/min_terminated_length": 907.0, "epoch": 0.6809361872374475, "frac_reward_zero_std": 0.0, "grad_norm": 3.1318690352182235, "kl": 0.0169525146484375, "learning_rate": 3.517043334590889e-07, "loss": 0.0203, "num_tokens": 148956894.0, "reward": 0.0, "reward_std": 0.9522942900657654, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18535215426808466, "rewards/wordcountpos_reward/raw_geo/std": 0.2356995459011071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1067.75, "completions/mean_terminated_length": 1067.75, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.6811362272454491, "frac_reward_zero_std": 0.0, "grad_norm": 3.4211005596873663, "kl": 0.016326904296875, "learning_rate": 3.514223060551169e-07, "loss": 0.0108, "num_tokens": 148988578.0, "reward": 0.0, "reward_std": 0.930086612701416, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07575301819764389, "rewards/wordcountpos_reward/raw_geo/std": 0.07025023439032428, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1037.4375, "completions/mean_terminated_length": 1037.4375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.6813362672534506, "frac_reward_zero_std": 0.0, "grad_norm": 3.6726036617110096, "kl": 0.02105712890625, "learning_rate": 3.5114037547853014e-07, "loss": 0.0065, "num_tokens": 149035017.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9978703260421753, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06877739104781816, "rewards/wordcountpos_reward/raw_geo/std": 0.17864277539832024, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.054433105395181744, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 977.5625, "completions/mean_terminated_length": 977.5625, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.6815363072614523, "frac_reward_zero_std": 0.0, "grad_norm": 2.8627025974041937, "kl": 0.0133819580078125, "learning_rate": 3.5085854186679953e-07, "loss": 0.0018, "num_tokens": 149083002.0, "reward": 0.0, "reward_std": 0.9668846130371094, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05532107140520401, "rewards/wordcountpos_reward/raw_geo/std": 0.07630834886563867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1329.6875, "completions/mean_terminated_length": 1272.916748046875, "completions/min_length": 1019.0, "completions/min_terminated_length": 1019.0, "epoch": 0.6817363472694539, "frac_reward_zero_std": 0.0, "grad_norm": 2.4777419304409554, "kl": 0.0117950439453125, "learning_rate": 3.505768053573478e-07, "loss": -0.0051, "num_tokens": 149126565.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0393885374069214, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02475568815775464, "rewards/wordcountpos_reward/raw_geo/std": 0.11311060684439539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 852.4375, "completions/mean_terminated_length": 852.4375, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.6819363872774555, "frac_reward_zero_std": 0.0, "grad_norm": 2.6363787620185732, "kl": 0.00945281982421875, "learning_rate": 3.502951660875517e-07, "loss": 0.0185, "num_tokens": 149175364.0, "reward": 0.0, "reward_std": 0.821591854095459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13779554703095181, "rewards/wordcountpos_reward/raw_geo/std": 0.10632760574662214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1210.1875, "completions/mean_terminated_length": 1168.7857666015625, "completions/min_length": 1000.0, "completions/min_terminated_length": 1000.0, "epoch": 0.6821364272854571, "frac_reward_zero_std": 0.0, "grad_norm": 2.243050065537905, "kl": 0.0093994140625, "learning_rate": 3.5001362419473935e-07, "loss": -0.0582, "num_tokens": 149221239.0, "reward": 0.0, "reward_std": 0.7975785732269287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06319007647829983, "rewards/wordcountpos_reward/raw_geo/std": 0.08309480884386614, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1118.4375, "completions/mean_terminated_length": 1118.4375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6823364672934586, "frac_reward_zero_std": 0.0, "grad_norm": 3.0509924209253767, "kl": 0.0160369873046875, "learning_rate": 3.4973217981619207e-07, "loss": -0.0158, "num_tokens": 149255598.0, "reward": -5.960464477539063e-08, "reward_std": 0.380526065826416, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07489216782702666, "rewards/wordcountpos_reward/raw_geo/std": 0.10209324452482854, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1104.875, "completions/mean_terminated_length": 1078.533447265625, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.6825365073014603, "frac_reward_zero_std": 0.0, "grad_norm": 3.176158168463132, "kl": 0.02484130859375, "learning_rate": 3.4945083308914296e-07, "loss": 0.0884, "num_tokens": 149292860.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7227488160133362, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01129546287695385, "rewards/wordcountpos_reward/raw_geo/std": 0.23996097243049475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1260.5625, "completions/mean_terminated_length": 1205.3077392578125, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.6827365473094619, "frac_reward_zero_std": 0.0, "grad_norm": 2.6486501018176987, "kl": 0.0135498046875, "learning_rate": 3.4916958415077866e-07, "loss": 0.0084, "num_tokens": 149340037.0, "reward": 0.0, "reward_std": 0.5464112758636475, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03701016854539186, "rewards/wordcountpos_reward/raw_geo/std": 0.09822591387035887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1062.9375, "completions/mean_terminated_length": 1062.9375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.6829365873174635, "frac_reward_zero_std": 0.0, "grad_norm": 3.5576308972679342, "kl": 0.020660400390625, "learning_rate": 3.488884331382369e-07, "loss": 0.0237, "num_tokens": 149391684.0, "reward": -1.862645149230957e-08, "reward_std": 1.018935203552246, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21584206373400927, "rewards/wordcountpos_reward/raw_geo/std": 0.1327931741552127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1296.0, "completions/max_terminated_length": 1296.0, "completions/mean_length": 1038.875, "completions/mean_terminated_length": 1038.875, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.6831366273254651, "frac_reward_zero_std": 0.0, "grad_norm": 3.282842453374452, "kl": 0.0148162841796875, "learning_rate": 3.4860738018860837e-07, "loss": -0.0217, "num_tokens": 149439466.0, "reward": 0.0, "reward_std": 0.5054129362106323, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03242701628281935, "rewards/wordcountpos_reward/raw_geo/std": 0.1918619475578747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.0850925422157591, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1226.1875, "completions/mean_terminated_length": 1207.933349609375, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.6833366673334667, "frac_reward_zero_std": 0.0, "grad_norm": 3.248370277483373, "kl": 0.019012451171875, "learning_rate": 3.483264254389358e-07, "loss": 0.0128, "num_tokens": 149490957.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8980104327201843, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0654475082488743, "rewards/wordcountpos_reward/raw_geo/std": 0.07056618823920228, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1140.5, "completions/mean_terminated_length": 1116.533447265625, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.6835367073414683, "frac_reward_zero_std": 0.0, "grad_norm": 2.4647930077321085, "kl": 0.0120391845703125, "learning_rate": 3.4804556902621415e-07, "loss": 0.0063, "num_tokens": 149530613.0, "reward": 0.0, "reward_std": 0.710591197013855, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.043920467915225886, "rewards/wordcountpos_reward/raw_geo/std": 0.1699447526430875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 1043.75, "completions/mean_terminated_length": 1043.75, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.6837367473494699, "frac_reward_zero_std": 0.0, "grad_norm": 2.3416606717252555, "kl": 0.01026153564453125, "learning_rate": 3.4776481108738984e-07, "loss": -0.0121, "num_tokens": 149566353.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9833933115005493, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05532692045069312, "rewards/wordcountpos_reward/raw_geo/std": 0.14531437428723806, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1190.5625, "completions/mean_terminated_length": 1190.5625, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.6839367873574715, "frac_reward_zero_std": 0.0, "grad_norm": 3.425611960935296, "kl": 0.023162841796875, "learning_rate": 3.474841517593621e-07, "loss": -0.0306, "num_tokens": 149608474.0, "reward": 0.0, "reward_std": 0.8576502203941345, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08347873309967221, "rewards/wordcountpos_reward/raw_geo/std": 0.07295011399351826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1089.0, "completions/mean_terminated_length": 1061.60009765625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.6841368273654731, "frac_reward_zero_std": 0.5, "grad_norm": 1.4160466055848453, "kl": 0.012451171875, "learning_rate": 3.4720359117898144e-07, "loss": -0.0188, "num_tokens": 149640426.0, "reward": 0.0, "reward_std": 0.7302340269088745, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.038651731865008754, "rewards/wordcountpos_reward/raw_geo/std": 0.059671234053390185, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 986.3125, "completions/mean_terminated_length": 986.3125, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.6843368673734747, "frac_reward_zero_std": 0.0, "grad_norm": 3.5586141711645665, "kl": 0.0169677734375, "learning_rate": 3.4692312948305083e-07, "loss": 0.0071, "num_tokens": 149680959.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8187517523765564, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035888219648341074, "rewards/wordcountpos_reward/raw_geo/std": 0.1748117057534293, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1112.0, "completions/mean_terminated_length": 1112.0, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.6845369073814763, "frac_reward_zero_std": 0.0, "grad_norm": 3.4565787611383, "kl": 0.0211181640625, "learning_rate": 3.466427668083242e-07, "loss": -0.0111, "num_tokens": 149729663.0, "reward": 0.0, "reward_std": 0.8154862523078918, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.013811799040701785, "rewards/wordcountpos_reward/raw_geo/std": 0.14327663676631963, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1230.375, "completions/mean_terminated_length": 1168.1539306640625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.6847369473894779, "frac_reward_zero_std": 0.0, "grad_norm": 3.0552605727110773, "kl": 0.01922607421875, "learning_rate": 3.46362503291508e-07, "loss": -0.056, "num_tokens": 149781733.0, "reward": 0.0, "reward_std": 0.948727011680603, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.26636375087917785, "rewards/wordcountpos_reward/raw_geo/std": 0.1522840026543513, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1123.375, "completions/mean_terminated_length": 1123.375, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.6849369873974795, "frac_reward_zero_std": 0.0, "grad_norm": 3.3746108178064396, "kl": 0.01806640625, "learning_rate": 3.4608233906925987e-07, "loss": -0.0076, "num_tokens": 149823331.0, "reward": 2.9802322387695312e-08, "reward_std": 0.975649356842041, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.170979788118473, "rewards/wordcountpos_reward/raw_geo/std": 0.28399467010050256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1347.3125, "completions/mean_terminated_length": 1296.416748046875, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.6851370274054811, "frac_reward_zero_std": 0.0, "grad_norm": 2.634471620412403, "kl": 0.0133819580078125, "learning_rate": 3.458022742781893e-07, "loss": -0.0086, "num_tokens": 149869552.0, "reward": 0.0, "reward_std": 0.816162645816803, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03403397789512917, "rewards/wordcountpos_reward/raw_geo/std": 0.11341176241581835, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 946.0, "completions/mean_terminated_length": 946.0, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.6853370674134827, "frac_reward_zero_std": 0.0, "grad_norm": 2.663142383180984, "kl": 0.0163116455078125, "learning_rate": 3.45522309054857e-07, "loss": 0.0082, "num_tokens": 149914520.0, "reward": 0.0, "reward_std": 0.952433705329895, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0611902387225896, "rewards/wordcountpos_reward/raw_geo/std": 0.07135018958474819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 1028.1875, "completions/mean_terminated_length": 1028.1875, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.6855371074214843, "frac_reward_zero_std": 0.0, "grad_norm": 3.50203039742557, "kl": 0.019561767578125, "learning_rate": 3.4524244353577513e-07, "loss": -0.021, "num_tokens": 149953675.0, "reward": 1.4901161193847656e-08, "reward_std": 1.02029550075531, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0928021372669142, "rewards/wordcountpos_reward/raw_geo/std": 0.2117284228492287, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1388.125, "completions/mean_terminated_length": 1321.0, "completions/min_length": 1221.0, "completions/min_terminated_length": 1221.0, "epoch": 0.6857371474294859, "frac_reward_zero_std": 0.0, "grad_norm": 2.9619388100067, "kl": 0.0140228271484375, "learning_rate": 3.449626778574077e-07, "loss": -0.0077, "num_tokens": 150001517.0, "reward": 0.0, "reward_std": 1.0673279762268066, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02189776033217637, "rewards/wordcountpos_reward/raw_geo/std": 0.08808983163501435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1265.9375, "completions/mean_terminated_length": 1232.5, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.6859371874374875, "frac_reward_zero_std": 0.0, "grad_norm": 2.784687375533646, "kl": 0.0134429931640625, "learning_rate": 3.446830121561697e-07, "loss": -0.0045, "num_tokens": 150042500.0, "reward": -2.9802322387695312e-08, "reward_std": 0.648874044418335, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09635873965207002, "rewards/wordcountpos_reward/raw_geo/std": 0.1005921906582272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1198.6875, "completions/mean_terminated_length": 1198.6875, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.6861372274454891, "frac_reward_zero_std": 0.0, "grad_norm": 3.3303153683334745, "kl": 0.019378662109375, "learning_rate": 3.4440344656842713e-07, "loss": -0.0277, "num_tokens": 150080775.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7720932960510254, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3530577161407387, "rewards/wordcountpos_reward/raw_geo/std": 0.1959877404064125, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1170.4375, "completions/mean_terminated_length": 1170.4375, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.6863372674534907, "frac_reward_zero_std": 0.0, "grad_norm": 2.8499741070135007, "kl": 0.0158233642578125, "learning_rate": 3.441239812304977e-07, "loss": 0.0117, "num_tokens": 150129158.0, "reward": 0.0, "reward_std": 0.8600112199783325, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0061202155613121695, "rewards/wordcountpos_reward/raw_geo/std": 0.07183708803264913, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1057.1875, "completions/mean_terminated_length": 1057.1875, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.6865373074614923, "frac_reward_zero_std": 0.0, "grad_norm": 2.8081211803070616, "kl": 0.01617431640625, "learning_rate": 3.4384461627864997e-07, "loss": 0.021, "num_tokens": 150173393.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0208470821380615, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14781893772367632, "rewards/wordcountpos_reward/raw_geo/std": 0.1165052483855857, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1107.9375, "completions/mean_terminated_length": 1081.800048828125, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.6867373474694939, "frac_reward_zero_std": 0.0, "grad_norm": 3.0347127784563983, "kl": 0.019989013671875, "learning_rate": 3.435653518491034e-07, "loss": -0.0426, "num_tokens": 150221512.0, "reward": 0.0, "reward_std": 0.8833644986152649, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0012286926508754807, "rewards/wordcountpos_reward/raw_geo/std": 0.10036879096912966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1261.1875, "completions/mean_terminated_length": 1181.5833740234375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.6869373874774956, "frac_reward_zero_std": 0.0, "grad_norm": 2.540000316013348, "kl": 0.0153656005859375, "learning_rate": 3.4328618807802856e-07, "loss": 0.0241, "num_tokens": 150273131.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6641878485679626, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1696386207675751, "rewards/wordcountpos_reward/raw_geo/std": 0.16261816327277315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1063.9375, "completions/mean_terminated_length": 1063.9375, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.6871374274854971, "frac_reward_zero_std": 0.0, "grad_norm": 3.7227887949834795, "kl": 0.0179901123046875, "learning_rate": 3.4300712510154707e-07, "loss": 0.0338, "num_tokens": 150322098.0, "reward": 0.0, "reward_std": 0.9108538627624512, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015745082068610514, "rewards/wordcountpos_reward/raw_geo/std": 0.1945109267070758, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970789, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1299.5625, "completions/mean_terminated_length": 1232.75, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.6873374674934987, "frac_reward_zero_std": 0.0, "grad_norm": 3.2306051019747524, "kl": 0.01416015625, "learning_rate": 3.4272816305573154e-07, "loss": 0.0307, "num_tokens": 150360035.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6423726677894592, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17128599884335519, "rewards/wordcountpos_reward/raw_geo/std": 0.18345816708933715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1231.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 1079.5625, "completions/mean_terminated_length": 1079.5625, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.6875375075015003, "frac_reward_zero_std": 0.0, "grad_norm": 3.06782860026765, "kl": 0.0156097412109375, "learning_rate": 3.424493020766046e-07, "loss": 0.0067, "num_tokens": 150401812.0, "reward": -5.960464477539063e-08, "reward_std": 0.7236750721931458, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07484054532575514, "rewards/wordcountpos_reward/raw_geo/std": 0.18637467623784604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567835, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1311.875, "completions/mean_terminated_length": 1199.0, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.6877375475095019, "frac_reward_zero_std": 0.0, "grad_norm": 2.8709453441898125, "kl": 0.02203369140625, "learning_rate": 3.4217054230014073e-07, "loss": -0.0464, "num_tokens": 150456698.0, "reward": -5.960464477539063e-08, "reward_std": 0.8480023145675659, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0234342348365834, "rewards/wordcountpos_reward/raw_geo/std": 0.15487330541458882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1201.25, "completions/mean_terminated_length": 1201.25, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.6879375875175034, "frac_reward_zero_std": 0.0, "grad_norm": 2.6332401151913474, "kl": 0.0145263671875, "learning_rate": 3.4189188386226403e-07, "loss": -0.0222, "num_tokens": 150498822.0, "reward": 0.0, "reward_std": 0.44756752252578735, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07301826415849653, "rewards/wordcountpos_reward/raw_geo/std": 0.21528160545538627, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1131.0, "completions/max_terminated_length": 1131.0, "completions/mean_length": 902.5625, "completions/mean_terminated_length": 902.5625, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.6881376275255051, "frac_reward_zero_std": 0.0, "grad_norm": 3.804233802106419, "kl": 0.020172119140625, "learning_rate": 3.4161332689884986e-07, "loss": -0.0437, "num_tokens": 150529167.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9043744802474976, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05816364385265598, "rewards/wordcountpos_reward/raw_geo/std": 0.08701134922632621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1192.6875, "completions/mean_terminated_length": 1148.7857666015625, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.6883376675335067, "frac_reward_zero_std": 0.0, "grad_norm": 3.164186624807662, "kl": 0.021453857421875, "learning_rate": 3.413348715457235e-07, "loss": -0.0039, "num_tokens": 150577818.0, "reward": 5.960464477539063e-08, "reward_std": 0.5741024613380432, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0348737408420375, "rewards/wordcountpos_reward/raw_geo/std": 0.23932358049621966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1415.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 987.625, "completions/mean_terminated_length": 987.625, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.6885377075415083, "frac_reward_zero_std": 0.0, "grad_norm": 2.977600967289615, "kl": 0.0147552490234375, "learning_rate": 3.410565179386615e-07, "loss": 0.0014, "num_tokens": 150612076.0, "reward": 0.0, "reward_std": 1.0520552396774292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.030877574937557155, "rewards/wordcountpos_reward/raw_geo/std": 0.10936935073061142, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 984.3125, "completions/mean_terminated_length": 984.3125, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.6887377475495099, "frac_reward_zero_std": 0.0, "grad_norm": 3.4124399874585807, "kl": 0.01617431640625, "learning_rate": 3.407782662133901e-07, "loss": -0.0526, "num_tokens": 150653009.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9300899505615234, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0850353535210143, "rewards/wordcountpos_reward/raw_geo/std": 0.09150422256576438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1112.6875, "completions/mean_terminated_length": 1112.6875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.6889377875575114, "frac_reward_zero_std": 0.0, "grad_norm": 3.441826870701278, "kl": 0.019805908203125, "learning_rate": 3.405001165055862e-07, "loss": -0.0291, "num_tokens": 150706396.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0336267948150635, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10494375355468724, "rewards/wordcountpos_reward/raw_geo/std": 0.18447635202734353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1040.5625, "completions/mean_terminated_length": 1009.9334106445312, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.6891378275655131, "frac_reward_zero_std": 0.0, "grad_norm": 3.166031218468166, "kl": 0.0149993896484375, "learning_rate": 3.402220689508767e-07, "loss": -0.0683, "num_tokens": 150751773.0, "reward": -7.450580596923828e-09, "reward_std": 1.0250895023345947, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.3736859688167897, "rewards/wordcountpos_reward/raw_geo/std": 0.09559086828896238, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12641788434189796, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1182.0, "completions/max_terminated_length": 1182.0, "completions/mean_length": 985.6875, "completions/mean_terminated_length": 985.6875, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.6893378675735147, "frac_reward_zero_std": 0.0, "grad_norm": 2.5217979411131153, "kl": 0.0106048583984375, "learning_rate": 3.3994412368483915e-07, "loss": -0.0139, "num_tokens": 150791272.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8966359496116638, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03658949820627975, "rewards/wordcountpos_reward/raw_geo/std": 0.13098883192353225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1209.5625, "completions/mean_terminated_length": 1209.5625, "completions/min_length": 1021.0, "completions/min_terminated_length": 1021.0, "epoch": 0.6895379075815163, "frac_reward_zero_std": 0.0, "grad_norm": 2.984053167532367, "kl": 0.0187835693359375, "learning_rate": 3.396662808430006e-07, "loss": -0.0182, "num_tokens": 150838105.0, "reward": 0.0, "reward_std": 0.32339319586753845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03726235215142727, "rewards/wordcountpos_reward/raw_geo/std": 0.10193669558149136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1080.625, "completions/mean_terminated_length": 1080.625, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.6897379475895179, "frac_reward_zero_std": 0.0, "grad_norm": 3.41324653532809, "kl": 0.020660400390625, "learning_rate": 3.3938854056083864e-07, "loss": -0.0202, "num_tokens": 150884219.0, "reward": -7.450580596923828e-09, "reward_std": 1.0469999313354492, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.18758474397745542, "rewards/wordcountpos_reward/raw_geo/std": 0.316281342204768, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1018.125, "completions/mean_terminated_length": 906.923095703125, "completions/min_length": 556.0, "completions/min_terminated_length": 556.0, "epoch": 0.6899379875975195, "frac_reward_zero_std": 0.0, "grad_norm": 1.411946822952574, "kl": 0.0087432861328125, "learning_rate": 3.3911090297378054e-07, "loss": 0.0316, "num_tokens": 150923373.0, "reward": 0.0, "reward_std": 0.9671487808227539, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061758920469883516, "rewards/wordcountpos_reward/raw_geo/std": 0.03905140709059813, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1191.6875, "completions/mean_terminated_length": 1171.1334228515625, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.6901380276055211, "frac_reward_zero_std": 0.0, "grad_norm": 3.186271689025614, "kl": 0.0181884765625, "learning_rate": 3.388333682172041e-07, "loss": -0.0192, "num_tokens": 150971456.0, "reward": -2.9802322387695312e-08, "reward_std": 0.993394136428833, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0454658260820055, "rewards/wordcountpos_reward/raw_geo/std": 0.2377927506788769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1016.875, "completions/mean_terminated_length": 1016.875, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.6903380676135227, "frac_reward_zero_std": 0.0, "grad_norm": 3.145455372667716, "kl": 0.023406982421875, "learning_rate": 3.38555936426436e-07, "loss": -0.0071, "num_tokens": 151001998.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7422385811805725, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05600843459841003, "rewards/wordcountpos_reward/raw_geo/std": 0.06533873057982845, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1099.1875, "completions/mean_terminated_length": 1099.1875, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.6905381076215243, "frac_reward_zero_std": 0.0, "grad_norm": 2.611554197259232, "kl": 0.011199951171875, "learning_rate": 3.382786077367534e-07, "loss": 0.0312, "num_tokens": 151044681.0, "reward": 0.0, "reward_std": 0.5478124618530273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06249940843428732, "rewards/wordcountpos_reward/raw_geo/std": 0.07882612598169048, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1272.625, "completions/mean_terminated_length": 1196.8333740234375, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.6907381476295259, "frac_reward_zero_std": 0.0, "grad_norm": 2.8602491889556916, "kl": 0.013031005859375, "learning_rate": 3.380013822833831e-07, "loss": -0.0027, "num_tokens": 151092915.0, "reward": -5.960464477539063e-08, "reward_std": 0.581490695476532, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2714070803128132, "rewards/wordcountpos_reward/raw_geo/std": 0.35094554274284673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1044.0, "completions/max_terminated_length": 1044.0, "completions/mean_length": 959.1875, "completions/mean_terminated_length": 959.1875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.6909381876375275, "frac_reward_zero_std": 0.0, "grad_norm": 2.0686385970056667, "kl": 0.00679779052734375, "learning_rate": 3.377242602015016e-07, "loss": 0.0021, "num_tokens": 151126638.0, "reward": 0.0, "reward_std": 0.972115159034729, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021951183896500772, "rewards/wordcountpos_reward/raw_geo/std": 0.04822623247490337, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1205.125, "completions/mean_terminated_length": 1205.125, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.6911382276455291, "frac_reward_zero_std": 0.0, "grad_norm": 1.6677347495382713, "kl": 0.010272979736328125, "learning_rate": 3.3744724162623464e-07, "loss": -0.0413, "num_tokens": 151165272.0, "reward": 0.0, "reward_std": 0.4919288158416748, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15135641719691248, "rewards/wordcountpos_reward/raw_geo/std": 0.2663390014641001, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1084.0, "completions/max_terminated_length": 1084.0, "completions/mean_length": 919.0, "completions/mean_terminated_length": 919.0, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.6913382676535307, "frac_reward_zero_std": 0.0, "grad_norm": 3.3867790300862497, "kl": 0.01934814453125, "learning_rate": 3.3717032669265776e-07, "loss": -0.0111, "num_tokens": 151197544.0, "reward": 0.0, "reward_std": 0.3451969027519226, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.042698513119437814, "rewards/wordcountpos_reward/raw_geo/std": 0.09909650473140227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 869.4375, "completions/mean_terminated_length": 869.4375, "completions/min_length": 398.0, "completions/min_terminated_length": 398.0, "epoch": 0.6915383076615323, "frac_reward_zero_std": 0.0, "grad_norm": 3.757680561839822, "kl": 0.0157928466796875, "learning_rate": 3.36893515535796e-07, "loss": -0.0241, "num_tokens": 151239351.0, "reward": 0.0, "reward_std": 1.0458213090896606, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04888507622866719, "rewards/wordcountpos_reward/raw_geo/std": 0.06382057522709168, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05692750425533111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1183.75, "completions/mean_terminated_length": 1110.769287109375, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.6917383476695339, "frac_reward_zero_std": 0.0, "grad_norm": 3.1582187306768654, "kl": 0.01727294921875, "learning_rate": 3.366168082906241e-07, "loss": -0.0329, "num_tokens": 151285395.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8724539279937744, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05357892459895744, "rewards/wordcountpos_reward/raw_geo/std": 0.04781863256410362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 920.5, "completions/mean_terminated_length": 920.5, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.6919383876775355, "frac_reward_zero_std": 0.0, "grad_norm": 3.752560709663266, "kl": 0.020416259765625, "learning_rate": 3.3634020509206497e-07, "loss": 0.0353, "num_tokens": 151324563.0, "reward": 0.0, "reward_std": 0.9297708868980408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03189596722458956, "rewards/wordcountpos_reward/raw_geo/std": 0.03516087523029775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1149.0, "completions/max_terminated_length": 1149.0, "completions/mean_length": 986.5625, "completions/mean_terminated_length": 986.5625, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.6921384276855371, "frac_reward_zero_std": 0.0, "grad_norm": 3.795603199632828, "kl": 0.020660400390625, "learning_rate": 3.360637060749924e-07, "loss": -0.0205, "num_tokens": 151364196.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5813413858413696, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1890271355164101, "rewards/wordcountpos_reward/raw_geo/std": 0.17682233463286753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.17716909687891083, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1222.75, "completions/mean_terminated_length": 1204.2667236328125, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.6923384676935387, "frac_reward_zero_std": 0.0, "grad_norm": 2.929246093767783, "kl": 0.01708984375, "learning_rate": 3.357873113742281e-07, "loss": -0.0509, "num_tokens": 151418168.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0599696636199951, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026526465544437897, "rewards/wordcountpos_reward/raw_geo/std": 0.08069021750616931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1249.5, "completions/mean_terminated_length": 1054.6666259765625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.6925385077015404, "frac_reward_zero_std": 0.0, "grad_norm": 3.011694369335494, "kl": 0.0157928466796875, "learning_rate": 3.3551102112454366e-07, "loss": -0.0437, "num_tokens": 151460200.0, "reward": -2.9802322387695312e-08, "reward_std": 0.47761428356170654, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02624294548124388, "rewards/wordcountpos_reward/raw_geo/std": 0.3111605265829628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1378.8125, "completions/mean_terminated_length": 1350.84619140625, "completions/min_length": 1223.0, "completions/min_terminated_length": 1223.0, "epoch": 0.6927385477095419, "frac_reward_zero_std": 0.0, "grad_norm": 2.6506229559480126, "kl": 0.017913818359375, "learning_rate": 3.3523483546065885e-07, "loss": 0.0033, "num_tokens": 151512773.0, "reward": 0.0, "reward_std": 1.0037871599197388, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.030232496683434286, "rewards/wordcountpos_reward/raw_geo/std": 0.07511543889930347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1103.0625, "completions/mean_terminated_length": 1103.0625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.6929385877175435, "frac_reward_zero_std": 0.0, "grad_norm": 2.8217287725542857, "kl": 0.013427734375, "learning_rate": 3.3495875451724386e-07, "loss": 0.0363, "num_tokens": 151543574.0, "reward": 0.0, "reward_std": 0.541938841342926, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.019490871638737056, "rewards/wordcountpos_reward/raw_geo/std": 0.13853705743319253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 987.75, "completions/mean_terminated_length": 987.75, "completions/min_length": 727.0, "completions/min_terminated_length": 727.0, "epoch": 0.6931386277255451, "frac_reward_zero_std": 0.0, "grad_norm": 3.3925892354150764, "kl": 0.01617431640625, "learning_rate": 3.3468277842891657e-07, "loss": 0.0013, "num_tokens": 151578890.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0171185731887817, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027713193803445786, "rewards/wordcountpos_reward/raw_geo/std": 0.169466145266719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1231.9375, "completions/mean_terminated_length": 1142.5833740234375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.6933386677335467, "frac_reward_zero_std": 0.0, "grad_norm": 2.9543323269605635, "kl": 0.0156097412109375, "learning_rate": 3.344069073302442e-07, "loss": -0.0567, "num_tokens": 151634753.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0439414978027344, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19368043181618486, "rewards/wordcountpos_reward/raw_geo/std": 0.12137172672046387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1411.3125, "completions/mean_terminated_length": 1297.2857666015625, "completions/min_length": 1219.0, "completions/min_terminated_length": 1219.0, "epoch": 0.6935387077415484, "frac_reward_zero_std": 0.0, "grad_norm": 2.345254188706487, "kl": 0.011127471923828125, "learning_rate": 3.3413114135574277e-07, "loss": -0.0044, "num_tokens": 151685382.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9038494825363159, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06854973051783862, "rewards/wordcountpos_reward/raw_geo/std": 0.0751361433216711, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 1029.5625, "completions/mean_terminated_length": 1029.5625, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.6937387477495499, "frac_reward_zero_std": 0.0, "grad_norm": 2.9677692923002374, "kl": 0.01617431640625, "learning_rate": 3.3385548063987724e-07, "loss": 0.0014, "num_tokens": 151725039.0, "reward": 2.9802322387695312e-08, "reward_std": 0.84123694896698, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033394980840131275, "rewards/wordcountpos_reward/raw_geo/std": 0.041085168620780994, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1208.8125, "completions/mean_terminated_length": 1141.615478515625, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.6939387877575515, "frac_reward_zero_std": 0.0, "grad_norm": 2.843807163986053, "kl": 0.0168914794921875, "learning_rate": 3.335799253170607e-07, "loss": -0.032, "num_tokens": 151768524.0, "reward": 0.0, "reward_std": 0.9624664783477783, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1138660917788958, "rewards/wordcountpos_reward/raw_geo/std": 0.1572586059506469, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1200.4375, "completions/mean_terminated_length": 1180.4666748046875, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.6941388277655531, "frac_reward_zero_std": 0.0, "grad_norm": 3.440326740168738, "kl": 0.018768310546875, "learning_rate": 3.333044755216553e-07, "loss": 0.0005, "num_tokens": 151814899.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9468466639518738, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09232255705082709, "rewards/wordcountpos_reward/raw_geo/std": 0.18000413452395955, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1182.875, "completions/mean_terminated_length": 1109.6923828125, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.6943388677735547, "frac_reward_zero_std": 0.0, "grad_norm": 3.091628669978774, "kl": 0.0152435302734375, "learning_rate": 3.330291313879716e-07, "loss": 0.0155, "num_tokens": 151845033.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9234707355499268, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08609506818035557, "rewards/wordcountpos_reward/raw_geo/std": 0.06859507869187363, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 1245.9375, "completions/mean_terminated_length": 1093.5, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.6945389077815564, "frac_reward_zero_std": 0.0, "grad_norm": 3.3040562808978167, "kl": 0.0177459716796875, "learning_rate": 3.3275389305026897e-07, "loss": -0.0329, "num_tokens": 151899544.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9894049763679504, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027491005451358304, "rewards/wordcountpos_reward/raw_geo/std": 0.1680510709649838, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1332.0, "completions/mean_terminated_length": 1231.2000732421875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.6947389477895579, "frac_reward_zero_std": 0.0, "grad_norm": 3.2258541109942698, "kl": 0.01751708984375, "learning_rate": 3.324787606427543e-07, "loss": 0.0437, "num_tokens": 151946448.0, "reward": 0.0, "reward_std": 0.5062788128852844, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22333334541355118, "rewards/wordcountpos_reward/raw_geo/std": 0.3952067704876335, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1173.25, "completions/mean_terminated_length": 1173.25, "completions/min_length": 1054.0, "completions/min_terminated_length": 1054.0, "epoch": 0.6949389877975595, "frac_reward_zero_std": 0.0, "grad_norm": 3.0515071582142457, "kl": 0.016143798828125, "learning_rate": 3.322037342995837e-07, "loss": 0.0149, "num_tokens": 151988252.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9545845985412598, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17212965920245002, "rewards/wordcountpos_reward/raw_geo/std": 0.11398060691568324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1093.9375, "completions/mean_terminated_length": 1093.9375, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.6951390278055611, "frac_reward_zero_std": 0.0, "grad_norm": 2.946905907615214, "kl": 0.0156402587890625, "learning_rate": 3.3192881415486125e-07, "loss": -0.0272, "num_tokens": 152029683.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9668072462081909, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00016981465746188779, "rewards/wordcountpos_reward/raw_geo/std": 0.10027261483037568, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1075.625, "completions/mean_terminated_length": 1075.625, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.6953390678135627, "frac_reward_zero_std": 0.0, "grad_norm": 2.9866354745518664, "kl": 0.0093536376953125, "learning_rate": 3.316540003426395e-07, "loss": -0.0007, "num_tokens": 152063885.0, "reward": 5.960464477539063e-08, "reward_std": 0.8853510618209839, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04220586465556665, "rewards/wordcountpos_reward/raw_geo/std": 0.05116160521970754, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1283.0, "completions/mean_terminated_length": 1184.3636474609375, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.6955391078215644, "frac_reward_zero_std": 0.0, "grad_norm": 2.9925915318614784, "kl": 0.0123138427734375, "learning_rate": 3.3137929299691836e-07, "loss": -0.0004, "num_tokens": 152109661.0, "reward": -7.450580596923828e-09, "reward_std": 0.9881343245506287, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03571544896997257, "rewards/wordcountpos_reward/raw_geo/std": 0.08978096908962502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1128.625, "completions/mean_terminated_length": 1128.625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.6957391478295659, "frac_reward_zero_std": 0.0, "grad_norm": 2.816679397544655, "kl": 0.013275146484375, "learning_rate": 3.3110469225164684e-07, "loss": 0.0049, "num_tokens": 152154063.0, "reward": 3.725290298461914e-09, "reward_std": 0.981864333152771, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06180853692734451, "rewards/wordcountpos_reward/raw_geo/std": 0.07235221440393387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1131.3125, "completions/mean_terminated_length": 1131.3125, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.6959391878375675, "frac_reward_zero_std": 0.0, "grad_norm": 2.901185605582683, "kl": 0.0163421630859375, "learning_rate": 3.3083019824072124e-07, "loss": -0.0249, "num_tokens": 152193564.0, "reward": -2.2351741790771484e-08, "reward_std": 1.002884864807129, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0553004134474037, "rewards/wordcountpos_reward/raw_geo/std": 0.09010489599610015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1173.5, "completions/mean_terminated_length": 1173.5, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.6961392278455691, "frac_reward_zero_std": 0.0, "grad_norm": 2.513923392639029, "kl": 0.0138397216796875, "learning_rate": 3.305558110979865e-07, "loss": -0.0454, "num_tokens": 152229180.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6148116588592529, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013645782734492535, "rewards/wordcountpos_reward/raw_geo/std": 0.0596670806220801, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1138550085106622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1210.3125, "completions/mean_terminated_length": 1078.6363525390625, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.6963392678535707, "frac_reward_zero_std": 0.0, "grad_norm": 2.718331566044902, "kl": 0.0162353515625, "learning_rate": 3.302815309572343e-07, "loss": -0.011, "num_tokens": 152283761.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6929193735122681, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0320285515653445, "rewards/wordcountpos_reward/raw_geo/std": 0.1427353380636406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1166.0, "completions/mean_terminated_length": 1166.0, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.6965393078615724, "frac_reward_zero_std": 0.0, "grad_norm": 3.1533439921069344, "kl": 0.02166748046875, "learning_rate": 3.3000735795220566e-07, "loss": 0.0259, "num_tokens": 152331601.0, "reward": 0.0, "reward_std": 0.5229870676994324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04937274958888502, "rewards/wordcountpos_reward/raw_geo/std": 0.09544172700453117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1294.125, "completions/mean_terminated_length": 1280.4000244140625, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.6967393478695739, "frac_reward_zero_std": 0.0, "grad_norm": 3.110218349493574, "kl": 0.01849365234375, "learning_rate": 3.297332922165883e-07, "loss": 0.0214, "num_tokens": 152385779.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8039785623550415, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1421180740712058, "rewards/wordcountpos_reward/raw_geo/std": 0.1332834578512431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1224.75, "completions/mean_terminated_length": 1161.2308349609375, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.6969393878775755, "frac_reward_zero_std": 0.0, "grad_norm": 3.4164905416798654, "kl": 0.0184478759765625, "learning_rate": 3.294593338840177e-07, "loss": 0.0044, "num_tokens": 152428367.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8986012935638428, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.123534099727586, "rewards/wordcountpos_reward/raw_geo/std": 0.1416219738449911, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1303.5625, "completions/mean_terminated_length": 1238.0833740234375, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.6971394278855771, "frac_reward_zero_std": 0.0, "grad_norm": 2.5984620404288945, "kl": 0.016326904296875, "learning_rate": 3.2918548308807737e-07, "loss": -0.0578, "num_tokens": 152487088.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6251376271247864, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10131027878208006, "rewards/wordcountpos_reward/raw_geo/std": 0.16530776479451323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1057.8125, "completions/mean_terminated_length": 1057.8125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.6973394678935787, "frac_reward_zero_std": 0.0, "grad_norm": 3.546572131181195, "kl": 0.0174560546875, "learning_rate": 3.2891173996229804e-07, "loss": -0.0469, "num_tokens": 152531109.0, "reward": 7.450580596923828e-09, "reward_std": 1.06306791305542, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09793657846037895, "rewards/wordcountpos_reward/raw_geo/std": 0.06087138895706571, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1080.8125, "completions/mean_terminated_length": 1080.8125, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.6975395079015804, "frac_reward_zero_std": 0.0, "grad_norm": 3.3618829717689707, "kl": 0.01800537109375, "learning_rate": 3.286381046401586e-07, "loss": 0.0042, "num_tokens": 152566162.0, "reward": 0.0, "reward_std": 0.7862850427627563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025806207491864103, "rewards/wordcountpos_reward/raw_geo/std": 0.03938895727420935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1111.6875, "completions/mean_terminated_length": 1111.6875, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.6977395479095819, "frac_reward_zero_std": 0.0, "grad_norm": 3.0778456830491567, "kl": 0.0136260986328125, "learning_rate": 3.283645772550841e-07, "loss": 0.0231, "num_tokens": 152604325.0, "reward": 0.0, "reward_std": 0.7862340211868286, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2718289561647805, "rewards/wordcountpos_reward/raw_geo/std": 0.3338952821599965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1223.875, "completions/mean_terminated_length": 1223.875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.6979395879175835, "frac_reward_zero_std": 0.0, "grad_norm": 2.8200373048232787, "kl": 0.0168304443359375, "learning_rate": 3.280911579404485e-07, "loss": 0.0175, "num_tokens": 152652939.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0637329816818237, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05343976023497288, "rewards/wordcountpos_reward/raw_geo/std": 0.1395418505006246, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666668, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 998.9375, "completions/mean_terminated_length": 998.9375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.6981396279255851, "frac_reward_zero_std": 0.0, "grad_norm": 3.870553291951592, "kl": 0.0218505859375, "learning_rate": 3.278178468295717e-07, "loss": -0.0247, "num_tokens": 152694562.0, "reward": 0.0, "reward_std": 0.6581835746765137, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04894887183683752, "rewards/wordcountpos_reward/raw_geo/std": 0.12027333840612052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.15341785110291775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 1096.625, "completions/mean_terminated_length": 1096.625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.6983396679335867, "frac_reward_zero_std": 0.0, "grad_norm": 3.696141092933706, "kl": 0.022003173828125, "learning_rate": 3.275446440557218e-07, "loss": 0.0005, "num_tokens": 152741220.0, "reward": 0.0, "reward_std": 1.023130178451538, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11614202784695517, "rewards/wordcountpos_reward/raw_geo/std": 0.11188738688728146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1031.1875, "completions/mean_terminated_length": 1031.1875, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.6985397079415884, "frac_reward_zero_std": 0.0, "grad_norm": 2.510082626950514, "kl": 0.01265716552734375, "learning_rate": 3.2727154975211364e-07, "loss": -0.0047, "num_tokens": 152791479.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0038063526153564, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.034467363380836724, "rewards/wordcountpos_reward/raw_geo/std": 0.1040156909770188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327549, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1263.5625, "completions/mean_terminated_length": 1156.0909423828125, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.6987397479495899, "frac_reward_zero_std": 0.0, "grad_norm": 3.061561953803008, "kl": 0.01434326171875, "learning_rate": 3.269985640519093e-07, "loss": 0.0402, "num_tokens": 152836760.0, "reward": 0.0, "reward_std": 0.8220562934875488, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1380489328541252, "rewards/wordcountpos_reward/raw_geo/std": 0.21337608325726692, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1196.625, "completions/mean_terminated_length": 1196.625, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.6989397879575915, "frac_reward_zero_std": 0.0, "grad_norm": 3.0987453387487536, "kl": 0.0156707763671875, "learning_rate": 3.267256870882176e-07, "loss": -0.0295, "num_tokens": 152880850.0, "reward": 0.0, "reward_std": 0.749603807926178, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0659819741928078, "rewards/wordcountpos_reward/raw_geo/std": 0.18115912111468188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1249.6875, "completions/mean_terminated_length": 1213.9285888671875, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.6991398279655932, "frac_reward_zero_std": 0.0, "grad_norm": 2.792065404375109, "kl": 0.01184844970703125, "learning_rate": 3.2645291899409477e-07, "loss": 0.008, "num_tokens": 152926021.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0573031902313232, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05785835184832844, "rewards/wordcountpos_reward/raw_geo/std": 0.07653841256807897, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1321.0, "completions/max_terminated_length": 1321.0, "completions/mean_length": 1197.625, "completions/mean_terminated_length": 1197.625, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.6993398679735947, "frac_reward_zero_std": 0.0, "grad_norm": 1.876098005087311, "kl": 0.00872039794921875, "learning_rate": 3.2618025990254374e-07, "loss": 0.0229, "num_tokens": 152963031.0, "reward": 0.0, "reward_std": 1.013519525527954, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13709673513362788, "rewards/wordcountpos_reward/raw_geo/std": 0.150003455416511, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1073.0, "completions/mean_terminated_length": 1073.0, "completions/min_length": 657.0, "completions/min_terminated_length": 657.0, "epoch": 0.6995399079815963, "frac_reward_zero_std": 0.0, "grad_norm": 3.371273293339633, "kl": 0.016937255859375, "learning_rate": 3.259077099465145e-07, "loss": 0.0003, "num_tokens": 153006311.0, "reward": 0.0, "reward_std": 0.5780473351478577, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.003013002370998168, "rewards/wordcountpos_reward/raw_geo/std": 0.09067968338296131, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1043.75, "completions/mean_terminated_length": 1043.75, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.6997399479895979, "frac_reward_zero_std": 0.0, "grad_norm": 2.353593501193257, "kl": 0.0125732421875, "learning_rate": 3.2563526925890323e-07, "loss": -0.0498, "num_tokens": 153046835.0, "reward": 0.0, "reward_std": 0.8486266732215881, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09710523982992753, "rewards/wordcountpos_reward/raw_geo/std": 0.07467335392746076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 1029.25, "completions/mean_terminated_length": 1029.25, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.6999399879975995, "frac_reward_zero_std": 0.0, "grad_norm": 3.5095570343320546, "kl": 0.0185394287109375, "learning_rate": 3.2536293797255355e-07, "loss": -0.0276, "num_tokens": 153090199.0, "reward": 0.0, "reward_std": 0.9264613389968872, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0501161878631269, "rewards/wordcountpos_reward/raw_geo/std": 0.05256710494715091, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1294.5625, "completions/mean_terminated_length": 1265.21435546875, "completions/min_length": 1044.0, "completions/min_terminated_length": 1044.0, "epoch": 0.7001400280056012, "frac_reward_zero_std": 0.0, "grad_norm": 2.804285181197797, "kl": 0.0154266357421875, "learning_rate": 3.250907162202554e-07, "loss": -0.0124, "num_tokens": 153143920.0, "reward": -4.470348358154297e-08, "reward_std": 0.9430570602416992, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19445021349088315, "rewards/wordcountpos_reward/raw_geo/std": 0.0920232463524516, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1023.3125, "completions/mean_terminated_length": 1023.3125, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.7003400680136027, "frac_reward_zero_std": 0.0, "grad_norm": 3.684125285544846, "kl": 0.022857666015625, "learning_rate": 3.2481860413474546e-07, "loss": -0.0276, "num_tokens": 153177389.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9840975403785706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03863790142558504, "rewards/wordcountpos_reward/raw_geo/std": 0.05927841319712092, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1107.625, "completions/mean_terminated_length": 1107.625, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.7005401080216043, "frac_reward_zero_std": 0.0, "grad_norm": 3.0946922612311742, "kl": 0.016937255859375, "learning_rate": 3.245466018487064e-07, "loss": -0.0709, "num_tokens": 153218119.0, "reward": -2.9802322387695312e-08, "reward_std": 0.792001485824585, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.27012689487034064, "rewards/wordcountpos_reward/raw_geo/std": 0.45431913983848693, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115676, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 963.75, "completions/mean_terminated_length": 963.75, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.7007401480296059, "frac_reward_zero_std": 0.0, "grad_norm": 3.632615843952558, "kl": 0.019317626953125, "learning_rate": 3.242747094947682e-07, "loss": -0.0222, "num_tokens": 153251507.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5375641584396362, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1708482206641039, "rewards/wordcountpos_reward/raw_geo/std": 0.1420176119072886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1352.25, "completions/mean_terminated_length": 1342.4000244140625, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.7009401880376075, "frac_reward_zero_std": 0.0, "grad_norm": 2.968187472777881, "kl": 0.01947021484375, "learning_rate": 3.240029272055065e-07, "loss": 0.01, "num_tokens": 153305615.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7430230379104614, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.034473096317124585, "rewards/wordcountpos_reward/raw_geo/std": 0.13950195587656905, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 985.1875, "completions/mean_terminated_length": 985.1875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.7011402280456092, "frac_reward_zero_std": 0.0, "grad_norm": 2.7064574451331227, "kl": 0.014739990234375, "learning_rate": 3.23731255113444e-07, "loss": 0.0017, "num_tokens": 153335978.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6063247919082642, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04596747327373357, "rewards/wordcountpos_reward/raw_geo/std": 0.045857556111660334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1002.5625, "completions/mean_terminated_length": 1002.5625, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.7013402680536107, "frac_reward_zero_std": 0.0, "grad_norm": 3.544149968718183, "kl": 0.0197906494140625, "learning_rate": 3.2345969335104863e-07, "loss": 0.0112, "num_tokens": 153370579.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9495434761047363, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05673741791264559, "rewards/wordcountpos_reward/raw_geo/std": 0.0992005664794046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 992.6875, "completions/mean_terminated_length": 992.6875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.7015403080616123, "frac_reward_zero_std": 0.0, "grad_norm": 3.425351825248816, "kl": 0.017608642578125, "learning_rate": 3.231882420507358e-07, "loss": 0.0218, "num_tokens": 153411670.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5243841409683228, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.060874625179216095, "rewards/wordcountpos_reward/raw_geo/std": 0.06518416682922576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1167460047694551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1303.25, "completions/mean_terminated_length": 1290.1334228515625, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.7017403480696139, "frac_reward_zero_std": 0.0, "grad_norm": 3.074365406419901, "kl": 0.019561767578125, "learning_rate": 3.229169013448659e-07, "loss": 0.0395, "num_tokens": 153464034.0, "reward": 0.0, "reward_std": 0.9742124080657959, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05581314223708479, "rewards/wordcountpos_reward/raw_geo/std": 0.1074097932566023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.061913918736689035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 758.125, "completions/mean_terminated_length": 758.125, "completions/min_length": 492.0, "completions/min_terminated_length": 492.0, "epoch": 0.7019403880776155, "frac_reward_zero_std": 0.0, "grad_norm": 3.4196691976524214, "kl": 0.01983642578125, "learning_rate": 3.2264567136574614e-07, "loss": -0.0453, "num_tokens": 153491212.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4168107509613037, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07344652073998559, "rewards/wordcountpos_reward/raw_geo/std": 0.09454348174806615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.15533714826025885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1088.6875, "completions/mean_terminated_length": 1088.6875, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.7021404280856172, "frac_reward_zero_std": 0.0, "grad_norm": 3.4067886523721342, "kl": 0.020721435546875, "learning_rate": 3.223745522456293e-07, "loss": -0.0038, "num_tokens": 153542863.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7886724472045898, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09323286658560118, "rewards/wordcountpos_reward/raw_geo/std": 0.19319951707749272, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1091.375, "completions/mean_terminated_length": 1091.375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.7023404680936187, "frac_reward_zero_std": 0.0, "grad_norm": 3.8086400611001507, "kl": 0.022003173828125, "learning_rate": 3.2210354411671457e-07, "loss": 0.0124, "num_tokens": 153596461.0, "reward": 0.0, "reward_std": 0.9451738595962524, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02695806867311326, "rewards/wordcountpos_reward/raw_geo/std": 0.02940749864420032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.24645636680909816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1228.1875, "completions/mean_terminated_length": 1210.0667724609375, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.7025405081016203, "frac_reward_zero_std": 0.0, "grad_norm": 2.9968348865844305, "kl": 0.0135345458984375, "learning_rate": 3.2183264711114643e-07, "loss": -0.0055, "num_tokens": 153647448.0, "reward": 0.0, "reward_std": 0.7918832302093506, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07882196081507006, "rewards/wordcountpos_reward/raw_geo/std": 0.08325201835190892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.30971910810591896, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 958.125, "completions/mean_terminated_length": 958.125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.7027405481096219, "frac_reward_zero_std": 0.0, "grad_norm": 3.754747386322328, "kl": 0.021759033203125, "learning_rate": 3.215618613610157e-07, "loss": -0.0205, "num_tokens": 153693546.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9273447394371033, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06330506159759178, "rewards/wordcountpos_reward/raw_geo/std": 0.08420580843367675, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.4187945664510094, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1256.375, "completions/mean_terminated_length": 1200.1539306640625, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.7029405881176235, "frac_reward_zero_std": 0.0, "grad_norm": 3.234950871795187, "kl": 0.014617919921875, "learning_rate": 3.2129118699835867e-07, "loss": 0.014, "num_tokens": 153729904.0, "reward": 0.0, "reward_std": 0.4225463271141052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.34335247563968546, "rewards/wordcountpos_reward/raw_geo/std": 0.10506745628224476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.3342210401870351, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1019.375, "completions/mean_terminated_length": 1019.375, "completions/min_length": 585.0, "completions/min_terminated_length": 585.0, "epoch": 0.7031406281256252, "frac_reward_zero_std": 0.0, "grad_norm": 2.870293542829498, "kl": 0.0148773193359375, "learning_rate": 3.210206241551576e-07, "loss": -0.0229, "num_tokens": 153761326.0, "reward": 0.0, "reward_std": 0.9000704288482666, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0323946013838442, "rewards/wordcountpos_reward/raw_geo/std": 0.03974046452172262, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.247019267504619, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 1037.625, "completions/mean_terminated_length": 1006.800048828125, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.7033406681336267, "frac_reward_zero_std": 0.0, "grad_norm": 2.9700871476305997, "kl": 0.0130615234375, "learning_rate": 3.2075017296333973e-07, "loss": 0.0539, "num_tokens": 153806712.0, "reward": 4.470348358154297e-08, "reward_std": 0.8759257793426514, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08491501167882139, "rewards/wordcountpos_reward/raw_geo/std": 0.0973022768595165, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.24581685443827062, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1147.1875, "completions/mean_terminated_length": 1123.666748046875, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.7035407081416283, "frac_reward_zero_std": 0.0, "grad_norm": 3.219331984566383, "kl": 0.0197296142578125, "learning_rate": 3.204798335547786e-07, "loss": 0.0023, "num_tokens": 153847275.0, "reward": 0.0, "reward_std": 0.8752948045730591, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05410589093549825, "rewards/wordcountpos_reward/raw_geo/std": 0.0643983019462053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.37515428924742517, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1182.0, "completions/max_terminated_length": 1182.0, "completions/mean_length": 1051.0, "completions/mean_terminated_length": 1051.0, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.70374074814963, "frac_reward_zero_std": 0.0, "grad_norm": 3.5333134568582816, "kl": 0.02081298828125, "learning_rate": 3.2020960606129277e-07, "loss": -0.0089, "num_tokens": 153883147.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5414524078369141, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08563764840977324, "rewards/wordcountpos_reward/raw_geo/std": 0.1626372695560882, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.399606287719782, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 941.6875, "completions/mean_terminated_length": 941.6875, "completions/min_length": 609.0, "completions/min_terminated_length": 609.0, "epoch": 0.7039407881576315, "frac_reward_zero_std": 0.0, "grad_norm": 3.9765406176385025, "kl": 0.02325439453125, "learning_rate": 3.1993949061464675e-07, "loss": -0.0745, "num_tokens": 153914358.0, "reward": 0.0, "reward_std": 1.0303186178207397, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19905035004353608, "rewards/wordcountpos_reward/raw_geo/std": 0.19231323605180461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.38015591343357863, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1184.0, "completions/mean_terminated_length": 1162.933349609375, "completions/min_length": 1015.0, "completions/min_terminated_length": 1015.0, "epoch": 0.7041408281656332, "frac_reward_zero_std": 0.0, "grad_norm": 2.6776839777869097, "kl": 0.013702392578125, "learning_rate": 3.1966948734654955e-07, "loss": 0.0123, "num_tokens": 153961310.0, "reward": -7.450580596923828e-09, "reward_std": 1.006545901298523, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.22911266732539268, "rewards/wordcountpos_reward/raw_geo/std": 0.10831557517821501, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.6333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.445554170128076, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1279.625, "completions/mean_terminated_length": 1179.45458984375, "completions/min_length": 1113.0, "completions/min_terminated_length": 1113.0, "epoch": 0.7043408681736347, "frac_reward_zero_std": 0.0, "grad_norm": 2.182026830761326, "kl": 0.01116943359375, "learning_rate": 3.1939959638865623e-07, "loss": 0.0013, "num_tokens": 154008832.0, "reward": 0.0, "reward_std": 0.8556678891181946, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11025322753937203, "rewards/wordcountpos_reward/raw_geo/std": 0.08789276710947402, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16324260518672248, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1088.0, "completions/mean_length": 964.875, "completions/mean_terminated_length": 929.2000732421875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.7045409081816363, "frac_reward_zero_std": 0.0, "grad_norm": 3.0892035102574695, "kl": 0.0172882080078125, "learning_rate": 3.1912981787256686e-07, "loss": 0.0284, "num_tokens": 154045142.0, "reward": 0.0, "reward_std": 0.7378066778182983, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.035632614694504475, "rewards/wordcountpos_reward/raw_geo/std": 0.19513560887598727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1075.1875, "completions/mean_terminated_length": 1075.1875, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.704740948189638, "frac_reward_zero_std": 0.0, "grad_norm": 3.4695916818959445, "kl": 0.020965576171875, "learning_rate": 3.188601519298269e-07, "loss": -0.0195, "num_tokens": 154081873.0, "reward": 0.0, "reward_std": 0.8036195635795593, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2743163355853004, "rewards/wordcountpos_reward/raw_geo/std": 0.1655252821621288, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.39015666369065416, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1110.5, "completions/mean_terminated_length": 1110.5, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.7049409881976395, "frac_reward_zero_std": 0.0, "grad_norm": 2.6016313762363477, "kl": 0.01239013671875, "learning_rate": 3.185905986919262e-07, "loss": -0.0194, "num_tokens": 154128449.0, "reward": 0.0, "reward_std": 0.8935558795928955, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14006988588700964, "rewards/wordcountpos_reward/raw_geo/std": 0.10162595321385569, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.23711225658371654, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1078.8125, "completions/mean_terminated_length": 1078.8125, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.7051410282056412, "frac_reward_zero_std": 0.0, "grad_norm": 3.278395767308563, "kl": 0.0131683349609375, "learning_rate": 3.183211582903003e-07, "loss": -0.016, "num_tokens": 154162686.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6013582348823547, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029183606875692165, "rewards/wordcountpos_reward/raw_geo/std": 0.08407876879280321, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1109.1875, "completions/mean_terminated_length": 1083.1334228515625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.7053410682136427, "frac_reward_zero_std": 0.0, "grad_norm": 2.611050588391774, "kl": 0.01287841796875, "learning_rate": 3.180518308563298e-07, "loss": 0.0135, "num_tokens": 154199409.0, "reward": -5.960464477539063e-08, "reward_std": 0.1337805539369583, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2600286837499566, "rewards/wordcountpos_reward/raw_geo/std": 0.28537922180669234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1124.1875, "completions/mean_terminated_length": 1099.1334228515625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.7055411082216443, "frac_reward_zero_std": 0.0, "grad_norm": 2.5889143302767668, "kl": 0.012298583984375, "learning_rate": 3.177826165213399e-07, "loss": -0.0011, "num_tokens": 154237540.0, "reward": 2.9802322387695312e-08, "reward_std": 0.3036491572856903, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16097084916796792, "rewards/wordcountpos_reward/raw_geo/std": 0.29863758134314256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1293.6875, "completions/mean_terminated_length": 1264.21435546875, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.705741148229646, "frac_reward_zero_std": 0.0, "grad_norm": 2.7011296793477917, "kl": 0.01190185546875, "learning_rate": 3.175135154166003e-07, "loss": -0.0076, "num_tokens": 154273039.0, "reward": 0.0, "reward_std": 0.8927574753761292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03200895033747763, "rewards/wordcountpos_reward/raw_geo/std": 0.06470473522444552, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1383.0, "completions/mean_terminated_length": 1266.0, "completions/min_length": 1144.0, "completions/min_terminated_length": 1144.0, "epoch": 0.7059411882376475, "frac_reward_zero_std": 0.5, "grad_norm": 1.719266800701356, "kl": 0.01373291015625, "learning_rate": 3.172445276733268e-07, "loss": -0.0063, "num_tokens": 154329487.0, "reward": 0.0, "reward_std": 0.5783447027206421, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06951473100020843, "rewards/wordcountpos_reward/raw_geo/std": 0.11149297964283732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1202.1875, "completions/mean_terminated_length": 1159.6429443359375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.7061412282456492, "frac_reward_zero_std": 0.0, "grad_norm": 3.3635907644969354, "kl": 0.02130126953125, "learning_rate": 3.1697565342267815e-07, "loss": -0.0129, "num_tokens": 154369978.0, "reward": 7.450580596923828e-09, "reward_std": 1.0506151914596558, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10545942664017503, "rewards/wordcountpos_reward/raw_geo/std": 0.0741885349794315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1092.0, "completions/max_terminated_length": 1092.0, "completions/mean_length": 908.0, "completions/mean_terminated_length": 908.0, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.7063412682536507, "frac_reward_zero_std": 0.0, "grad_norm": 4.146191915496324, "kl": 0.02740478515625, "learning_rate": 3.1670689279575933e-07, "loss": -0.0129, "num_tokens": 154411282.0, "reward": 0.0, "reward_std": 0.7554078102111816, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04234047738118089, "rewards/wordcountpos_reward/raw_geo/std": 0.1667378899008894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1211.375, "completions/mean_terminated_length": 1211.375, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.7065413082616523, "frac_reward_zero_std": 0.0, "grad_norm": 2.5243153669835254, "kl": 0.0144805908203125, "learning_rate": 3.164382459236186e-07, "loss": -0.0033, "num_tokens": 154461384.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9778491854667664, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1049249571662495, "rewards/wordcountpos_reward/raw_geo/std": 0.08524655255231156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1250.3125, "completions/mean_terminated_length": 1250.3125, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.706741348269654, "frac_reward_zero_std": 0.0, "grad_norm": 2.4360403062932967, "kl": 0.01146697998046875, "learning_rate": 3.1616971293724997e-07, "loss": 0.0101, "num_tokens": 154508661.0, "reward": 7.450580596923828e-09, "reward_std": 1.0370798110961914, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09074120326516356, "rewards/wordcountpos_reward/raw_geo/std": 0.05741460281095797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 967.0, "completions/mean_terminated_length": 967.0, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.7069413882776555, "frac_reward_zero_std": 0.5, "grad_norm": 2.050208553872063, "kl": 0.0180206298828125, "learning_rate": 3.1590129396759104e-07, "loss": -0.0059, "num_tokens": 154540253.0, "reward": 1.4901161193847656e-08, "reward_std": 0.7284266352653503, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04026468550942533, "rewards/wordcountpos_reward/raw_geo/std": 0.1555842910329143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1150.8125, "completions/mean_terminated_length": 1150.8125, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.7071414282856572, "frac_reward_zero_std": 0.0, "grad_norm": 3.519928383469397, "kl": 0.018890380859375, "learning_rate": 3.1563298914552414e-07, "loss": -0.0578, "num_tokens": 154582594.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6889880895614624, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0714412911853541, "rewards/wordcountpos_reward/raw_geo/std": 0.07506802219686295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1262.25, "completions/mean_terminated_length": 1228.2857666015625, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.7073414682936587, "frac_reward_zero_std": 0.0, "grad_norm": 3.0125854977421267, "kl": 0.0187835693359375, "learning_rate": 3.15364798601876e-07, "loss": -0.0151, "num_tokens": 154630406.0, "reward": 0.0, "reward_std": 0.8438016176223755, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09943354975074035, "rewards/wordcountpos_reward/raw_geo/std": 0.09961595171882545, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1237.9375, "completions/mean_terminated_length": 1220.4666748046875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.7075415083016603, "frac_reward_zero_std": 0.0, "grad_norm": 2.7738464828454785, "kl": 0.014984130859375, "learning_rate": 3.150967224674177e-07, "loss": 0.0337, "num_tokens": 154673293.0, "reward": 5.960464477539063e-08, "reward_std": 0.4259047508239746, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.31418129544232154, "rewards/wordcountpos_reward/raw_geo/std": 0.25707609271542525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1258.75, "completions/mean_terminated_length": 1203.0770263671875, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.707741548309662, "frac_reward_zero_std": 0.0, "grad_norm": 2.9456087987734287, "kl": 0.0160064697265625, "learning_rate": 3.1482876087286414e-07, "loss": 0.0448, "num_tokens": 154718569.0, "reward": 0.0, "reward_std": 0.21829426288604736, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0671728395085237, "rewards/wordcountpos_reward/raw_geo/std": 0.24095174851098003, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1074.375, "completions/mean_terminated_length": 1046.0, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.7079415883176635, "frac_reward_zero_std": 0.0, "grad_norm": 2.9771205248469985, "kl": 0.0146484375, "learning_rate": 3.1456091394887443e-07, "loss": 0.0535, "num_tokens": 154762343.0, "reward": 0.0, "reward_std": 0.6485716700553894, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10518819302733808, "rewards/wordcountpos_reward/raw_geo/std": 0.13011312355831825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1309.5, "completions/mean_terminated_length": 1161.3333740234375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.7081416283256652, "frac_reward_zero_std": 0.0, "grad_norm": 2.6665271561969344, "kl": 0.0130767822265625, "learning_rate": 3.1429318182605246e-07, "loss": 0.0096, "num_tokens": 154813319.0, "reward": 5.960464477539063e-08, "reward_std": 0.4359464943408966, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036821817967061125, "rewards/wordcountpos_reward/raw_geo/std": 0.07092776038627209, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1111.8125, "completions/mean_terminated_length": 1111.8125, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.7083416683336667, "frac_reward_zero_std": 0.0, "grad_norm": 3.447571302816751, "kl": 0.0174407958984375, "learning_rate": 3.1402556463494523e-07, "loss": -0.0292, "num_tokens": 154854844.0, "reward": 0.0, "reward_std": 0.6580593585968018, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0041653722638611106, "rewards/wordcountpos_reward/raw_geo/std": 0.060974177941570014, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1133.0625, "completions/mean_terminated_length": 1048.3846435546875, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.7085417083416683, "frac_reward_zero_std": 0.0, "grad_norm": 2.699594351217848, "kl": 0.011566162109375, "learning_rate": 3.1375806250604415e-07, "loss": -0.0444, "num_tokens": 154908453.0, "reward": -1.4901161193847656e-08, "reward_std": 0.953021764755249, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026888359794851036, "rewards/wordcountpos_reward/raw_geo/std": 0.13761134097471675, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1196.8125, "completions/mean_terminated_length": 1176.60009765625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.70874174834967, "frac_reward_zero_std": 0.0, "grad_norm": 2.814994904619091, "kl": 0.0146484375, "learning_rate": 3.1349067556978455e-07, "loss": 0.0216, "num_tokens": 154955362.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6382793188095093, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.031266400851394024, "rewards/wordcountpos_reward/raw_geo/std": 0.15630399873769965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1362.875, "completions/mean_terminated_length": 1256.2222900390625, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.7089417883576715, "frac_reward_zero_std": 0.0, "grad_norm": 2.4208798386489785, "kl": 0.0112762451171875, "learning_rate": 3.132234039565457e-07, "loss": -0.0585, "num_tokens": 155010696.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6759063005447388, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0575408846320033, "rewards/wordcountpos_reward/raw_geo/std": 0.14367814719833064, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1153.625, "completions/mean_terminated_length": 1153.625, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.7091418283656732, "frac_reward_zero_std": 0.0, "grad_norm": 3.0510376765712857, "kl": 0.0160064697265625, "learning_rate": 3.1295624779665006e-07, "loss": 0.0116, "num_tokens": 155056730.0, "reward": 0.0, "reward_std": 0.587248682975769, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0678313625455608, "rewards/wordcountpos_reward/raw_geo/std": 0.12435060904923141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1119.1875, "completions/mean_terminated_length": 1064.7857666015625, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.7093418683736747, "frac_reward_zero_std": 0.0, "grad_norm": 2.631690865299317, "kl": 0.0117034912109375, "learning_rate": 3.1268920722036447e-07, "loss": 0.001, "num_tokens": 155092909.0, "reward": 7.450580596923828e-09, "reward_std": 1.0576794147491455, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.054528023822764685, "rewards/wordcountpos_reward/raw_geo/std": 0.07500187585776584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1433.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1018.5625, "completions/mean_terminated_length": 1018.5625, "completions/min_length": 729.0, "completions/min_terminated_length": 729.0, "epoch": 0.7095419083816763, "frac_reward_zero_std": 0.0, "grad_norm": 3.3194090202535484, "kl": 0.0161285400390625, "learning_rate": 3.12422282357899e-07, "loss": 0.0262, "num_tokens": 155145494.0, "reward": 0.0, "reward_std": 0.815845251083374, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10855983807591763, "rewards/wordcountpos_reward/raw_geo/std": 0.10133260636447242, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1234.25, "completions/mean_terminated_length": 1234.25, "completions/min_length": 1064.0, "completions/min_terminated_length": 1064.0, "epoch": 0.709741948389678, "frac_reward_zero_std": 0.0, "grad_norm": 2.7437627816163723, "kl": 0.01357269287109375, "learning_rate": 3.1215547333940764e-07, "loss": -0.0281, "num_tokens": 155188154.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0309381484985352, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12318770753655926, "rewards/wordcountpos_reward/raw_geo/std": 0.1308206239837414, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1099.0625, "completions/mean_terminated_length": 1099.0625, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.7099419883976795, "frac_reward_zero_std": 0.0, "grad_norm": 2.944196063485162, "kl": 0.018402099609375, "learning_rate": 3.118887802949873e-07, "loss": -0.02, "num_tokens": 155239675.0, "reward": 0.0, "reward_std": 1.007983684539795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2810250575129999, "rewards/wordcountpos_reward/raw_geo/std": 0.15970547340844293, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 991.9375, "completions/mean_terminated_length": 991.9375, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.7101420284056812, "frac_reward_zero_std": 0.0, "grad_norm": 2.6347738501748084, "kl": 0.013092041015625, "learning_rate": 3.11622203354679e-07, "loss": -0.0133, "num_tokens": 155280570.0, "reward": 7.450580596923828e-09, "reward_std": 1.0187921524047852, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.15068570280268237, "rewards/wordcountpos_reward/raw_geo/std": 0.07407127571593744, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1266.1875, "completions/mean_terminated_length": 1266.1875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.7103420684136827, "frac_reward_zero_std": 0.0, "grad_norm": 3.4739948653780446, "kl": 0.02105712890625, "learning_rate": 3.113557426484667e-07, "loss": -0.0395, "num_tokens": 155324301.0, "reward": 0.0, "reward_std": 0.8053336143493652, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11928599117593103, "rewards/wordcountpos_reward/raw_geo/std": 0.17879160394414115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1182.5, "completions/mean_terminated_length": 1182.5, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.7105421084216843, "frac_reward_zero_std": 0.0, "grad_norm": 2.3774307484746537, "kl": 0.0149688720703125, "learning_rate": 3.110893983062782e-07, "loss": -0.0134, "num_tokens": 155362333.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0492079257965088, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23750701291315282, "rewards/wordcountpos_reward/raw_geo/std": 0.10865570835889277, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1137.6875, "completions/mean_terminated_length": 1137.6875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.710742148429686, "frac_reward_zero_std": 0.0, "grad_norm": 2.660279545898054, "kl": 0.012725830078125, "learning_rate": 3.1082317045798376e-07, "loss": 0.0062, "num_tokens": 155404808.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9188191890716553, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1583183995889882, "rewards/wordcountpos_reward/raw_geo/std": 0.14861819655989028, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1388.6875, "completions/mean_terminated_length": 1351.5833740234375, "completions/min_length": 1126.0, "completions/min_terminated_length": 1126.0, "epoch": 0.7109421884376875, "frac_reward_zero_std": 0.0, "grad_norm": 2.3481136613817055, "kl": 0.00955963134765625, "learning_rate": 3.1055705923339746e-07, "loss": 0.0055, "num_tokens": 155450035.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9734987616539001, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.031157004502885815, "rewards/wordcountpos_reward/raw_geo/std": 0.04984074888632834, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 960.8125, "completions/mean_terminated_length": 960.8125, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.7111422284456891, "frac_reward_zero_std": 0.0, "grad_norm": 3.1795347295697702, "kl": 0.0201416015625, "learning_rate": 3.102910647622764e-07, "loss": -0.0083, "num_tokens": 155487752.0, "reward": 3.725290298461914e-09, "reward_std": 0.8326483368873596, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1110956686787055, "rewards/wordcountpos_reward/raw_geo/std": 0.23484938684939408, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1190.9375, "completions/mean_terminated_length": 1190.9375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.7113422684536908, "frac_reward_zero_std": 0.0, "grad_norm": 3.211839603343413, "kl": 0.018798828125, "learning_rate": 3.100251871743208e-07, "loss": 0.0058, "num_tokens": 155536447.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6554089784622192, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07383347960648884, "rewards/wordcountpos_reward/raw_geo/std": 0.07408146059787452, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 944.125, "completions/mean_terminated_length": 944.125, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.7115423084616923, "frac_reward_zero_std": 0.0, "grad_norm": 2.6642535737812065, "kl": 0.011688232421875, "learning_rate": 3.0975942659917334e-07, "loss": -0.0117, "num_tokens": 155571809.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9480444192886353, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.006219993662089387, "rewards/wordcountpos_reward/raw_geo/std": 0.12594847731869602, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1362.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1200.875, "completions/mean_terminated_length": 1200.875, "completions/min_length": 1110.0, "completions/min_terminated_length": 1110.0, "epoch": 0.711742348469694, "frac_reward_zero_std": 0.0, "grad_norm": 2.112210592318033, "kl": 0.010661125183105469, "learning_rate": 3.094937831664207e-07, "loss": 0.0142, "num_tokens": 155614695.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0686531066894531, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06626020151537362, "rewards/wordcountpos_reward/raw_geo/std": 0.057582633683270705, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1257.8125, "completions/mean_terminated_length": 1241.666748046875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.7119423884776955, "frac_reward_zero_std": 0.0, "grad_norm": 3.1738219504416083, "kl": 0.018341064453125, "learning_rate": 3.092282570055913e-07, "loss": -0.0442, "num_tokens": 155668468.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0405099391937256, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012168350517718984, "rewards/wordcountpos_reward/raw_geo/std": 0.11282322984791388, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869924, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1226.1875, "completions/mean_terminated_length": 1134.916748046875, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.7121424284856971, "frac_reward_zero_std": 0.0, "grad_norm": 3.087729925375661, "kl": 0.014892578125, "learning_rate": 3.089628482461573e-07, "loss": -0.0845, "num_tokens": 155714039.0, "reward": 1.4901161193847656e-08, "reward_std": 1.064027190208435, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04515155160356789, "rewards/wordcountpos_reward/raw_geo/std": 0.08889640342716369, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 1000.1875, "completions/mean_terminated_length": 1000.1875, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.7123424684936988, "frac_reward_zero_std": 0.0, "grad_norm": 3.583319917902798, "kl": 0.025360107421875, "learning_rate": 3.086975570175327e-07, "loss": -0.012, "num_tokens": 155757490.0, "reward": -3.3527612686157227e-08, "reward_std": 0.9808301329612732, "rewards/wordcountpos_reward/mean": -3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06689275920312505, "rewards/wordcountpos_reward/raw_geo/std": 0.13144703493426443, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1218.375, "completions/mean_terminated_length": 1090.3636474609375, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.7125425085017003, "frac_reward_zero_std": 0.0, "grad_norm": 3.2273277857519798, "kl": 0.018768310546875, "learning_rate": 3.0843238344907535e-07, "loss": -0.0054, "num_tokens": 155810376.0, "reward": 0.0, "reward_std": 0.9119106531143188, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22821541041729737, "rewards/wordcountpos_reward/raw_geo/std": 0.19160342719422951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1114.6875, "completions/mean_terminated_length": 1089.0, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.712742548509702, "frac_reward_zero_std": 0.0, "grad_norm": 2.9283286869996688, "kl": 0.0156402587890625, "learning_rate": 3.0816732767008457e-07, "loss": -0.0628, "num_tokens": 155847067.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8242074251174927, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16774106504414713, "rewards/wordcountpos_reward/raw_geo/std": 0.10660626225237026, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0787635937708768, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1233.0625, "completions/mean_terminated_length": 1215.2667236328125, "completions/min_length": 644.0, "completions/min_terminated_length": 644.0, "epoch": 0.7129425885177035, "frac_reward_zero_std": 0.0, "grad_norm": 22.538411479232426, "kl": 0.042327880859375, "learning_rate": 3.07902389809803e-07, "loss": -0.0736, "num_tokens": 155896900.0, "reward": 0.0, "reward_std": 1.04718816280365, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.24368294731958637, "rewards/wordcountpos_reward/raw_geo/std": 0.23478020898369226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.13492110177323527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1090.0, "completions/max_terminated_length": 1090.0, "completions/mean_length": 902.125, "completions/mean_terminated_length": 902.125, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.7131426285257051, "frac_reward_zero_std": 0.0, "grad_norm": 3.930779735011287, "kl": 0.0175018310546875, "learning_rate": 3.0763756999741554e-07, "loss": 0.0272, "num_tokens": 155934198.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9898998737335205, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10715745709623559, "rewards/wordcountpos_reward/raw_geo/std": 0.18704586875051538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1411.9375, "completions/mean_terminated_length": 1323.875, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.7133426685337068, "frac_reward_zero_std": 0.0, "grad_norm": 2.9972571827760266, "kl": 0.0171051025390625, "learning_rate": 3.073728683620496e-07, "loss": 0.0205, "num_tokens": 155992765.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9914847612380981, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20445960998565396, "rewards/wordcountpos_reward/raw_geo/std": 0.07203448918397665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1275.75, "completions/mean_terminated_length": 1224.0, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.7135427085417083, "frac_reward_zero_std": 0.0, "grad_norm": 2.884493649097003, "kl": 0.014984130859375, "learning_rate": 3.0710828503277464e-07, "loss": -0.0187, "num_tokens": 156042497.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8704541921615601, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1017265947385466, "rewards/wordcountpos_reward/raw_geo/std": 0.08461770994416663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1153.75, "completions/mean_terminated_length": 1130.666748046875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.71374274854971, "frac_reward_zero_std": 0.0, "grad_norm": 3.402592487386956, "kl": 0.0185546875, "learning_rate": 3.068438201386028e-07, "loss": -0.0271, "num_tokens": 156092253.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9286425113677979, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14045193405442968, "rewards/wordcountpos_reward/raw_geo/std": 0.12187489468811774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1351.6875, "completions/mean_terminated_length": 1317.4615478515625, "completions/min_length": 1143.0, "completions/min_terminated_length": 1143.0, "epoch": 0.7139427885577115, "frac_reward_zero_std": 0.0, "grad_norm": 2.4083485585874667, "kl": 0.0127410888671875, "learning_rate": 3.065794738084885e-07, "loss": -0.0167, "num_tokens": 156141104.0, "reward": 0.0, "reward_std": 0.788641631603241, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.005087442452949663, "rewards/wordcountpos_reward/raw_geo/std": 0.09719932714223668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1144.5625, "completions/mean_terminated_length": 1144.5625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.7141428285657131, "frac_reward_zero_std": 0.0, "grad_norm": 3.299427490236205, "kl": 0.017730712890625, "learning_rate": 3.063152461713282e-07, "loss": 0.0167, "num_tokens": 156180409.0, "reward": 2.9802322387695312e-08, "reward_std": 0.971190333366394, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.060604076277108496, "rewards/wordcountpos_reward/raw_geo/std": 0.15352594345396464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1406.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1191.0625, "completions/mean_terminated_length": 1191.0625, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.7143428685737148, "frac_reward_zero_std": 0.0, "grad_norm": 2.7867485118264566, "kl": 0.012542724609375, "learning_rate": 3.0605113735596026e-07, "loss": 0.0102, "num_tokens": 156218538.0, "reward": 0.0, "reward_std": 0.7993193864822388, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.00056235215890558, "rewards/wordcountpos_reward/raw_geo/std": 0.41743790979804135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1063.75, "completions/mean_terminated_length": 1063.75, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.7145429085817163, "frac_reward_zero_std": 0.0, "grad_norm": 3.19339305948437, "kl": 0.017486572265625, "learning_rate": 3.057871474911655e-07, "loss": -0.0482, "num_tokens": 156264742.0, "reward": 0.0, "reward_std": 0.8357334733009338, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035943062776556366, "rewards/wordcountpos_reward/raw_geo/std": 0.057632877832748806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1220.8125, "completions/mean_terminated_length": 1220.8125, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.714742948589718, "frac_reward_zero_std": 0.0, "grad_norm": 2.7352884772984773, "kl": 0.011932373046875, "learning_rate": 3.055232767056666e-07, "loss": 0.0199, "num_tokens": 156305619.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8884719014167786, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06497597746754895, "rewards/wordcountpos_reward/raw_geo/std": 0.0697952603589734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1290.1875, "completions/mean_terminated_length": 1127.0, "completions/min_length": 560.0, "completions/min_terminated_length": 560.0, "epoch": 0.7149429885977195, "frac_reward_zero_std": 0.0, "grad_norm": 2.832513725726322, "kl": 0.0139617919921875, "learning_rate": 3.052595251281283e-07, "loss": -0.0441, "num_tokens": 156357558.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7567921876907349, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.039014947215919665, "rewards/wordcountpos_reward/raw_geo/std": 0.080056806676942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 977.625, "completions/mean_terminated_length": 977.625, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.7151430286057211, "frac_reward_zero_std": 0.0, "grad_norm": 3.446321446688001, "kl": 0.01959228515625, "learning_rate": 3.0499589288715676e-07, "loss": 0.03, "num_tokens": 156390944.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6171700358390808, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054512755657757005, "rewards/wordcountpos_reward/raw_geo/std": 0.08543229016759787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1119.0, "completions/mean_terminated_length": 1119.0, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.7153430686137228, "frac_reward_zero_std": 0.0, "grad_norm": 2.983140264871891, "kl": 0.0157012939453125, "learning_rate": 3.047323801113003e-07, "loss": -0.0075, "num_tokens": 156439592.0, "reward": 0.0, "reward_std": 0.9218766093254089, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12640786063948592, "rewards/wordcountpos_reward/raw_geo/std": 0.07475356636944479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1133.0, "completions/mean_terminated_length": 1133.0, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.7155431086217243, "frac_reward_zero_std": 0.0, "grad_norm": 3.895095192093312, "kl": 0.02642822265625, "learning_rate": 3.044689869290491e-07, "loss": -0.0139, "num_tokens": 156490528.0, "reward": -1.862645149230957e-08, "reward_std": 1.0651593208312988, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024149561509822958, "rewards/wordcountpos_reward/raw_geo/std": 0.20864413411664387, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1228.9375, "completions/mean_terminated_length": 1138.5833740234375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.715743148629726, "frac_reward_zero_std": 0.0, "grad_norm": 3.4221214579584145, "kl": 0.019287109375, "learning_rate": 3.0420571346883497e-07, "loss": -0.0474, "num_tokens": 156537199.0, "reward": 2.9802322387695312e-08, "reward_std": 0.1900852471590042, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2683240964268842, "rewards/wordcountpos_reward/raw_geo/std": 0.12894382866365842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242308, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1128.4375, "completions/mean_terminated_length": 1128.4375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.7159431886377275, "frac_reward_zero_std": 0.0, "grad_norm": 3.2481239589276374, "kl": 0.0143280029296875, "learning_rate": 3.0394255985903067e-07, "loss": -0.0572, "num_tokens": 156579870.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8996506929397583, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.026632116232721176, "rewards/wordcountpos_reward/raw_geo/std": 0.04539445138189454, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1165.5625, "completions/mean_terminated_length": 964.9000244140625, "completions/min_length": 585.0, "completions/min_terminated_length": 585.0, "epoch": 0.7161432286457291, "frac_reward_zero_std": 0.0, "grad_norm": 2.9843629826631597, "kl": 0.0158843994140625, "learning_rate": 3.036795262279519e-07, "loss": -0.048, "num_tokens": 156620991.0, "reward": 0.0, "reward_std": 0.7253214716911316, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1350712518388378, "rewards/wordcountpos_reward/raw_geo/std": 0.16201925422331737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1480740555462905, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1146.0, "completions/mean_terminated_length": 1146.0, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.7163432686537308, "frac_reward_zero_std": 0.0, "grad_norm": 3.0250425704741666, "kl": 0.016998291015625, "learning_rate": 3.0341661270385446e-07, "loss": -0.0156, "num_tokens": 156672535.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0072288513183594, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005194014971423707, "rewards/wordcountpos_reward/raw_geo/std": 0.11277213694168263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1146.625, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.7165433086617323, "frac_reward_zero_std": 0.0, "grad_norm": 3.5659394181023023, "kl": 0.02374267578125, "learning_rate": 3.0315381941493645e-07, "loss": -0.0046, "num_tokens": 156726377.0, "reward": 0.0, "reward_std": 1.0251020193099976, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1688478338817813, "rewards/wordcountpos_reward/raw_geo/std": 0.23024219475762692, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1125.125, "completions/mean_terminated_length": 1071.571533203125, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.716743348669734, "frac_reward_zero_std": 0.0, "grad_norm": 3.2445090720585377, "kl": 0.018524169921875, "learning_rate": 3.0289114648933687e-07, "loss": 0.0196, "num_tokens": 156772227.0, "reward": 0.0, "reward_std": 0.8910329937934875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17457376475115516, "rewards/wordcountpos_reward/raw_geo/std": 0.06538928570260807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1056.5625, "completions/mean_terminated_length": 1056.5625, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.7169433886777355, "frac_reward_zero_std": 0.0, "grad_norm": 2.9983615875673815, "kl": 0.0136260986328125, "learning_rate": 3.0262859405513664e-07, "loss": -0.0225, "num_tokens": 156827372.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6894912719726562, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029401142423427964, "rewards/wordcountpos_reward/raw_geo/std": 0.11481712137560314, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1114.25, "completions/mean_terminated_length": 1114.25, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.7171434286857371, "frac_reward_zero_std": 0.0, "grad_norm": 2.3327375303983193, "kl": 0.0118408203125, "learning_rate": 3.02366162240357e-07, "loss": 0.013, "num_tokens": 156874592.0, "reward": 0.0, "reward_std": 0.7793896794319153, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11323236165200667, "rewards/wordcountpos_reward/raw_geo/std": 0.2237696639057499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11013459778666117, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1170.9375, "completions/mean_terminated_length": 1123.9285888671875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.7173434686937388, "frac_reward_zero_std": 0.0, "grad_norm": 3.136649677974457, "kl": 0.017974853515625, "learning_rate": 3.0210385117296125e-07, "loss": 0.0317, "num_tokens": 156924679.0, "reward": 0.0, "reward_std": 0.9297451376914978, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17972627594323584, "rewards/wordcountpos_reward/raw_geo/std": 0.1440516236719604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1303.4375, "completions/mean_terminated_length": 1275.357177734375, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.7175435087017403, "frac_reward_zero_std": 0.0, "grad_norm": 2.781761814499393, "kl": 0.01507568359375, "learning_rate": 3.018416609808532e-07, "loss": -0.0066, "num_tokens": 156974670.0, "reward": 0.0, "reward_std": 0.7170824408531189, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05801101983603098, "rewards/wordcountpos_reward/raw_geo/std": 0.28778696114871843, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1471.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1088.3125, "completions/mean_terminated_length": 1088.3125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.717743548709742, "frac_reward_zero_std": 0.0, "grad_norm": 3.5763534197954923, "kl": 0.020965576171875, "learning_rate": 3.015795917918783e-07, "loss": -0.0076, "num_tokens": 157019483.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7880460023880005, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06475369047168347, "rewards/wordcountpos_reward/raw_geo/std": 0.04556013588373256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346311, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1184.0, "completions/mean_terminated_length": 1040.3636474609375, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.7179435887177436, "frac_reward_zero_std": 0.0, "grad_norm": 1.7306968462361743, "kl": 0.004543304443359375, "learning_rate": 3.013176437338224e-07, "loss": 0.0184, "num_tokens": 157063115.0, "reward": 0.0, "reward_std": 0.9260464906692505, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1701397354311728, "rewards/wordcountpos_reward/raw_geo/std": 0.13954298829039152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1091.4375, "completions/mean_terminated_length": 1033.071533203125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.7181436287257451, "frac_reward_zero_std": 0.0, "grad_norm": 3.4473077814496498, "kl": 0.020751953125, "learning_rate": 3.0105581693441264e-07, "loss": 0.0247, "num_tokens": 157103666.0, "reward": -2.2351741790771484e-08, "reward_std": 1.058680772781372, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0708936681112323, "rewards/wordcountpos_reward/raw_geo/std": 0.12090114807201599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8500000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 1114.8125, "completions/mean_terminated_length": 1114.8125, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.7183436687337468, "frac_reward_zero_std": 0.0, "grad_norm": 3.322084781189852, "kl": 0.019775390625, "learning_rate": 3.0079411152131714e-07, "loss": -0.0139, "num_tokens": 157140527.0, "reward": 0.0, "reward_std": 0.9146561622619629, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08781882063196254, "rewards/wordcountpos_reward/raw_geo/std": 0.09170541119390917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1085.0625, "completions/mean_terminated_length": 1085.0625, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.7185437087417483, "frac_reward_zero_std": 0.0, "grad_norm": 2.878113600266532, "kl": 0.0248870849609375, "learning_rate": 3.0053252762214434e-07, "loss": 0.0056, "num_tokens": 157182216.0, "reward": -1.4901161193847656e-08, "reward_std": 1.021111249923706, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10886462304552753, "rewards/wordcountpos_reward/raw_geo/std": 0.1218359444416438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 817.125, "completions/mean_terminated_length": 817.125, "completions/min_length": 625.0, "completions/min_terminated_length": 625.0, "epoch": 0.71874374874975, "frac_reward_zero_std": 0.0, "grad_norm": 4.460188376810746, "kl": 0.03448486328125, "learning_rate": 3.002710653644438e-07, "loss": -0.1051, "num_tokens": 157220786.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9965775609016418, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0595504009246194, "rewards/wordcountpos_reward/raw_geo/std": 0.1256126491730458, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1071.125, "completions/mean_terminated_length": 972.1538696289062, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.7189437887577516, "frac_reward_zero_std": 0.0, "grad_norm": 3.184215044874558, "kl": 0.016448974609375, "learning_rate": 3.000097248757058e-07, "loss": -0.0147, "num_tokens": 157264332.0, "reward": 0.0, "reward_std": 0.9640743732452393, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027776237573395337, "rewards/wordcountpos_reward/raw_geo/std": 0.28056237881001983, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1170.0, "completions/max_terminated_length": 1170.0, "completions/mean_length": 967.0625, "completions/mean_terminated_length": 967.0625, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.7191438287657531, "frac_reward_zero_std": 0.0, "grad_norm": 3.177332499171931, "kl": 0.013641357421875, "learning_rate": 2.997485062833613e-07, "loss": -0.0581, "num_tokens": 157305741.0, "reward": 0.0, "reward_std": 0.780616044998169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03100604088015782, "rewards/wordcountpos_reward/raw_geo/std": 0.07310473331279466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 986.8125, "completions/mean_terminated_length": 986.8125, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.7193438687737548, "frac_reward_zero_std": 0.0, "grad_norm": 2.4888258739394735, "kl": 0.010650634765625, "learning_rate": 2.994874097147814e-07, "loss": -0.0127, "num_tokens": 157344754.0, "reward": 0.0, "reward_std": 1.043357491493225, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11563461813904906, "rewards/wordcountpos_reward/raw_geo/std": 0.08622290217162437, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12583057392117916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1396.875, "completions/mean_terminated_length": 1335.0, "completions/min_length": 1138.0, "completions/min_terminated_length": 1138.0, "epoch": 0.7195439087817563, "frac_reward_zero_std": 0.0, "grad_norm": 2.820135428010118, "kl": 0.0162506103515625, "learning_rate": 2.9922643529727807e-07, "loss": 0.0059, "num_tokens": 157396096.0, "reward": 0.0, "reward_std": 0.8812559247016907, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01700246176086303, "rewards/wordcountpos_reward/raw_geo/std": 0.04058912543303214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1153.5, "completions/mean_terminated_length": 1130.4000244140625, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.719743948789758, "frac_reward_zero_std": 0.0, "grad_norm": 3.5140283134912607, "kl": 0.0194091796875, "learning_rate": 2.989655831581036e-07, "loss": 0.0379, "num_tokens": 157438424.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5473682880401611, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0035399011980190376, "rewards/wordcountpos_reward/raw_geo/std": 0.018009043658732828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14700718047466632, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1042.125, "completions/mean_terminated_length": 1042.125, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.7199439887977596, "frac_reward_zero_std": 0.0, "grad_norm": 3.2473982700299824, "kl": 0.017669677734375, "learning_rate": 2.9870485342445083e-07, "loss": -0.0292, "num_tokens": 157478602.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9150305986404419, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.011284780901860051, "rewards/wordcountpos_reward/raw_geo/std": 0.09105076024721413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1278.625, "completions/mean_terminated_length": 1178.0, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.7201440288057611, "frac_reward_zero_std": 0.0, "grad_norm": 2.5472222426422104, "kl": 0.0120849609375, "learning_rate": 2.9844424622345255e-07, "loss": 0.0076, "num_tokens": 157532972.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7631452679634094, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11972638865479257, "rewards/wordcountpos_reward/raw_geo/std": 0.2908345064995025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1191.0625, "completions/mean_terminated_length": 1119.769287109375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.7203440688137628, "frac_reward_zero_std": 0.0, "grad_norm": 3.142098203899145, "kl": 0.0151824951171875, "learning_rate": 2.9818376168218204e-07, "loss": 0.0352, "num_tokens": 157585917.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9335024356842041, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04075657771004921, "rewards/wordcountpos_reward/raw_geo/std": 0.1330037915943761, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.14291929864761416, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1110.5, "completions/mean_terminated_length": 1110.5, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.7205441088217643, "frac_reward_zero_std": 0.0, "grad_norm": 3.0409717361352575, "kl": 0.016204833984375, "learning_rate": 2.9792339992765286e-07, "loss": -0.0189, "num_tokens": 157631621.0, "reward": 0.0, "reward_std": 0.8714605569839478, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06836805969233739, "rewards/wordcountpos_reward/raw_geo/std": 0.08333199234603025, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387148, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1142.5625, "completions/mean_terminated_length": 1142.5625, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.720744148829766, "frac_reward_zero_std": 0.0, "grad_norm": 3.1782305361782344, "kl": 0.0120086669921875, "learning_rate": 2.9766316108681867e-07, "loss": -0.0367, "num_tokens": 157669102.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8551006317138672, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07793322343311938, "rewards/wordcountpos_reward/raw_geo/std": 0.061799244760134285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1119.6875, "completions/mean_terminated_length": 1119.6875, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.7209441888377676, "frac_reward_zero_std": 0.0, "grad_norm": 2.977235784694899, "kl": 0.0138397216796875, "learning_rate": 2.9740304528657267e-07, "loss": 0.0363, "num_tokens": 157711241.0, "reward": 2.2351741790771484e-08, "reward_std": 0.9182931184768677, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19054212242023197, "rewards/wordcountpos_reward/raw_geo/std": 0.13740328309886543, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 980.5625, "completions/mean_terminated_length": 980.5625, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.7211442288457691, "frac_reward_zero_std": 0.0, "grad_norm": 3.1800055487858163, "kl": 0.01751708984375, "learning_rate": 2.9714305265374906e-07, "loss": -0.0115, "num_tokens": 157754722.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7964819669723511, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016143675966970288, "rewards/wordcountpos_reward/raw_geo/std": 0.03797934333611082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1173.75, "completions/mean_terminated_length": 1152.0, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.7213442688537708, "frac_reward_zero_std": 0.0, "grad_norm": 3.4023094730426346, "kl": 0.0172119140625, "learning_rate": 2.968831833151211e-07, "loss": -0.0485, "num_tokens": 157805286.0, "reward": -3.725290298461914e-09, "reward_std": 1.0621042251586914, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.22545146200433558, "rewards/wordcountpos_reward/raw_geo/std": 0.10699706559862213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1380.625, "completions/mean_terminated_length": 1326.3636474609375, "completions/min_length": 1128.0, "completions/min_terminated_length": 1128.0, "epoch": 0.7215443088617723, "frac_reward_zero_std": 0.0, "grad_norm": 2.503437502534177, "kl": 0.0150146484375, "learning_rate": 2.966234373974025e-07, "loss": 0.0469, "num_tokens": 157852312.0, "reward": 0.0, "reward_std": 0.414530485868454, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17565093454192035, "rewards/wordcountpos_reward/raw_geo/std": 0.2612806099978299, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1114.0, "completions/max_terminated_length": 1114.0, "completions/mean_length": 958.5625, "completions/mean_terminated_length": 958.5625, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.7217443488697739, "frac_reward_zero_std": 0.0, "grad_norm": 3.5311010811234005, "kl": 0.017547607421875, "learning_rate": 2.9636381502724594e-07, "loss": -0.0203, "num_tokens": 157895081.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7012832164764404, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06426957850745202, "rewards/wordcountpos_reward/raw_geo/std": 0.08485216736821695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1253.9375, "completions/mean_terminated_length": 1197.1539306640625, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.7219443888777756, "frac_reward_zero_std": 0.0, "grad_norm": 2.736295961931833, "kl": 0.0148773193359375, "learning_rate": 2.961043163312453e-07, "loss": -0.0058, "num_tokens": 157939232.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5704208612442017, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041870359076212005, "rewards/wordcountpos_reward/raw_geo/std": 0.20121820514016436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1049.9375, "completions/mean_terminated_length": 1049.9375, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.7221444288857771, "frac_reward_zero_std": 0.0, "grad_norm": 3.2402123100723514, "kl": 0.024017333984375, "learning_rate": 2.958449414359328e-07, "loss": -0.0051, "num_tokens": 157983239.0, "reward": 0.0, "reward_std": 0.6806356906890869, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10024390652918841, "rewards/wordcountpos_reward/raw_geo/std": 0.15363802238276367, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 1030.6875, "completions/mean_terminated_length": 1030.6875, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.7223444688937788, "frac_reward_zero_std": 0.0, "grad_norm": 4.306738534075356, "kl": 0.05914306640625, "learning_rate": 2.9558569046778093e-07, "loss": 0.027, "num_tokens": 158033066.0, "reward": 0.0, "reward_std": 0.6993340253829956, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.017367213978880407, "rewards/wordcountpos_reward/raw_geo/std": 0.20766007848262985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1127.1875, "completions/mean_terminated_length": 1127.1875, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.7225445089017803, "frac_reward_zero_std": 0.0, "grad_norm": 3.5870201534623125, "kl": 0.0175323486328125, "learning_rate": 2.9532656355320173e-07, "loss": 0.0095, "num_tokens": 158063981.0, "reward": 0.0, "reward_std": 0.5553433299064636, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19633346373934843, "rewards/wordcountpos_reward/raw_geo/std": 0.09998920398708706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1115.6875, "completions/mean_terminated_length": 1115.6875, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.7227445489097819, "frac_reward_zero_std": 0.0, "grad_norm": 3.1293444547936096, "kl": 0.014739990234375, "learning_rate": 2.9506756081854664e-07, "loss": 0.0385, "num_tokens": 158094544.0, "reward": 0.0, "reward_std": 0.6668806076049805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0009295628195414092, "rewards/wordcountpos_reward/raw_geo/std": 0.0625218138569417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1289.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1113.875, "completions/mean_terminated_length": 1113.875, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.7229445889177836, "frac_reward_zero_std": 0.0, "grad_norm": 2.9722856502511887, "kl": 0.01432037353515625, "learning_rate": 2.948086823901064e-07, "loss": 0.0072, "num_tokens": 158139494.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4724990427494049, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.053255909861687115, "rewards/wordcountpos_reward/raw_geo/std": 0.16989842023403445, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1538999193800477, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1286.125, "completions/mean_terminated_length": 1188.9091796875, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.7231446289257851, "frac_reward_zero_std": 0.0, "grad_norm": 3.071506649157216, "kl": 0.0198974609375, "learning_rate": 2.945499283941114e-07, "loss": -0.0486, "num_tokens": 158187784.0, "reward": 0.0, "reward_std": 0.895163357257843, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13426118410645782, "rewards/wordcountpos_reward/raw_geo/std": 0.26728549848522004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1318.4375, "completions/mean_terminated_length": 1209.5, "completions/min_length": 989.0, "completions/min_terminated_length": 989.0, "epoch": 0.7233446689337868, "frac_reward_zero_std": 0.0, "grad_norm": 2.9722588016871185, "kl": 0.01263427734375, "learning_rate": 2.942912989567313e-07, "loss": -0.0379, "num_tokens": 158233303.0, "reward": 0.0, "reward_std": 0.5351900458335876, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026566541308330707, "rewards/wordcountpos_reward/raw_geo/std": 0.25205994665572695, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1097.8125, "completions/mean_terminated_length": 1097.8125, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.7235447089417884, "frac_reward_zero_std": 0.0, "grad_norm": 3.4229086969626255, "kl": 0.017578125, "learning_rate": 2.940327942040752e-07, "loss": 0.0104, "num_tokens": 158280772.0, "reward": -2.9802322387695312e-08, "reward_std": 0.2846035361289978, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18369227562221382, "rewards/wordcountpos_reward/raw_geo/std": 0.3001019589282771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11409872268574492, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1306.625, "completions/mean_terminated_length": 1218.727294921875, "completions/min_length": 1123.0, "completions/min_terminated_length": 1123.0, "epoch": 0.7237447489497899, "frac_reward_zero_std": 0.0, "grad_norm": 2.3974225154602355, "kl": 0.00978851318359375, "learning_rate": 2.937744142621907e-07, "loss": -0.0127, "num_tokens": 158336414.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7121055126190186, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09237462364311073, "rewards/wordcountpos_reward/raw_geo/std": 0.0725480724676715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820632, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1139.25, "completions/mean_terminated_length": 1115.2000732421875, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.7239447889577916, "frac_reward_zero_std": 0.0, "grad_norm": 3.4560762532933906, "kl": 0.0184478759765625, "learning_rate": 2.9351615925706543e-07, "loss": 0.0117, "num_tokens": 158380898.0, "reward": 0.0, "reward_std": 0.4301000237464905, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022372313491799856, "rewards/wordcountpos_reward/raw_geo/std": 0.09501543952484257, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.14851112939963645, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1136.6875, "completions/mean_terminated_length": 1015.5833740234375, "completions/min_length": 310.0, "completions/min_terminated_length": 310.0, "epoch": 0.7241448289657931, "frac_reward_zero_std": 0.0, "grad_norm": 3.018166178657845, "kl": 0.015869140625, "learning_rate": 2.9325802931462543e-07, "loss": 0.0862, "num_tokens": 158430261.0, "reward": 0.0, "reward_std": 0.7246705293655396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18094052213062695, "rewards/wordcountpos_reward/raw_geo/std": 0.13872201317435523, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1173.8125, "completions/mean_terminated_length": 1173.8125, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.7243448689737948, "frac_reward_zero_std": 0.0, "grad_norm": 2.6005864661214595, "kl": 0.01364898681640625, "learning_rate": 2.9300002456073647e-07, "loss": 0.0421, "num_tokens": 158471338.0, "reward": 1.30385160446167e-08, "reward_std": 0.8150008916854858, "rewards/wordcountpos_reward/mean": 1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15948319028011107, "rewards/wordcountpos_reward/raw_geo/std": 0.13322232135829334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 1022.125, "completions/mean_terminated_length": 1022.125, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.7245449089817964, "frac_reward_zero_std": 0.0, "grad_norm": 4.142700657177093, "kl": 0.021575927734375, "learning_rate": 2.9274214512120233e-07, "loss": -0.0039, "num_tokens": 158514788.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8011192083358765, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036010923516968274, "rewards/wordcountpos_reward/raw_geo/std": 0.041170899977648626, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1302.5, "completions/mean_terminated_length": 1236.666748046875, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.7247449489897979, "frac_reward_zero_std": 0.0, "grad_norm": 3.198201440277619, "kl": 0.021331787109375, "learning_rate": 2.9248439112176636e-07, "loss": 0.0193, "num_tokens": 158567500.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0196713209152222, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09294262784927029, "rewards/wordcountpos_reward/raw_geo/std": 0.164408018169255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1052.5625, "completions/mean_terminated_length": 1052.5625, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.7249449889977996, "frac_reward_zero_std": 0.0, "grad_norm": 3.4435081675742647, "kl": 0.018035888671875, "learning_rate": 2.9222676268811074e-07, "loss": -0.0072, "num_tokens": 158608973.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8645801544189453, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08845548477769576, "rewards/wordcountpos_reward/raw_geo/std": 0.2388600853842033, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635778, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1362.0, "completions/mean_terminated_length": 1299.272705078125, "completions/min_length": 1086.0, "completions/min_terminated_length": 1086.0, "epoch": 0.7251450290058011, "frac_reward_zero_std": 0.0, "grad_norm": 2.8793513293600603, "kl": 0.021148681640625, "learning_rate": 2.919692599458562e-07, "loss": 0.0331, "num_tokens": 158663477.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8843902349472046, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0384939718013234, "rewards/wordcountpos_reward/raw_geo/std": 0.15536832435343662, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1027.375, "completions/mean_terminated_length": 1027.375, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.7253450690138028, "frac_reward_zero_std": 0.0, "grad_norm": 3.8668733609521344, "kl": 0.0220947265625, "learning_rate": 2.9171188302056205e-07, "loss": -0.0102, "num_tokens": 158710675.0, "reward": 0.0, "reward_std": 1.0533368587493896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08698213539293999, "rewards/wordcountpos_reward/raw_geo/std": 0.05825420826759232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 981.6875, "completions/mean_terminated_length": 981.6875, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.7255451090218044, "frac_reward_zero_std": 0.0, "grad_norm": 3.3678861691674116, "kl": 0.0155487060546875, "learning_rate": 2.914546320377268e-07, "loss": 0.0348, "num_tokens": 158745150.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7450853586196899, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03038584793047503, "rewards/wordcountpos_reward/raw_geo/std": 0.15119237679849215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970784, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1235.5625, "completions/mean_terminated_length": 1115.3636474609375, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.7257451490298059, "frac_reward_zero_std": 0.0, "grad_norm": 2.9329891398698784, "kl": 0.0136566162109375, "learning_rate": 2.911975071227868e-07, "loss": 0.0058, "num_tokens": 158801927.0, "reward": 4.470348358154297e-08, "reward_std": 0.810945451259613, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08549178265628725, "rewards/wordcountpos_reward/raw_geo/std": 0.1711973814133769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1223.875, "completions/mean_terminated_length": 1160.1539306640625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.7259451890378076, "frac_reward_zero_std": 0.0, "grad_norm": 2.988654600456266, "kl": 0.016448974609375, "learning_rate": 2.909405084011177e-07, "loss": -0.028, "num_tokens": 158846933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4215250313282013, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15211092627003125, "rewards/wordcountpos_reward/raw_geo/std": 0.08266693851503251, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.19398358082484618, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1104.6875, "completions/mean_terminated_length": 1104.6875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.7261452290458091, "frac_reward_zero_std": 0.0, "grad_norm": 3.31749172380097, "kl": 0.01861572265625, "learning_rate": 2.906836359980328e-07, "loss": -0.027, "num_tokens": 158893632.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9158291816711426, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08403684359027495, "rewards/wordcountpos_reward/raw_geo/std": 0.19112490102935406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1212.8125, "completions/mean_terminated_length": 1117.0833740234375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.7263452690538108, "frac_reward_zero_std": 0.0, "grad_norm": 2.6587692447899833, "kl": 0.01751708984375, "learning_rate": 2.9042689003878483e-07, "loss": -0.1679, "num_tokens": 158937245.0, "reward": 0.0, "reward_std": 0.43431341648101807, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06455844845147032, "rewards/wordcountpos_reward/raw_geo/std": 0.17543110880413396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.18196458751941574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1112.125, "completions/mean_terminated_length": 1112.125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.7265453090618124, "frac_reward_zero_std": 0.0, "grad_norm": 3.498023599498352, "kl": 0.019317626953125, "learning_rate": 2.901702706485638e-07, "loss": 0.0114, "num_tokens": 158987383.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8752995133399963, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2874920781195012, "rewards/wordcountpos_reward/raw_geo/std": 0.13699604341700886, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 983.6875, "completions/mean_terminated_length": 983.6875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.7267453490698139, "frac_reward_zero_std": 0.0, "grad_norm": 2.532729361743104, "kl": 0.017120361328125, "learning_rate": 2.8991377795249874e-07, "loss": 0.0074, "num_tokens": 159027074.0, "reward": 0.0, "reward_std": 0.8489640951156616, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15459452549705913, "rewards/wordcountpos_reward/raw_geo/std": 0.134687345177666, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1458.875, "completions/mean_terminated_length": 1368.4000244140625, "completions/min_length": 1226.0, "completions/min_terminated_length": 1226.0, "epoch": 0.7269453890778156, "frac_reward_zero_std": 0.0, "grad_norm": 1.934242087996957, "kl": 0.00650787353515625, "learning_rate": 2.8965741207565645e-07, "loss": -0.0093, "num_tokens": 159077200.0, "reward": 0.0, "reward_std": 1.0388479232788086, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1654081665583404, "rewards/wordcountpos_reward/raw_geo/std": 0.17963305189607015, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 956.3125, "completions/mean_terminated_length": 956.3125, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.7271454290858171, "frac_reward_zero_std": 0.0, "grad_norm": 2.723701831280841, "kl": 0.01148223876953125, "learning_rate": 2.894011731430425e-07, "loss": -0.0735, "num_tokens": 159110061.0, "reward": 0.0, "reward_std": 0.8851425647735596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08852470277011194, "rewards/wordcountpos_reward/raw_geo/std": 0.10350288669703944, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1273.25, "completions/mean_terminated_length": 1273.25, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.7273454690938188, "frac_reward_zero_std": 0.0, "grad_norm": 3.1621194643693817, "kl": 0.020233154296875, "learning_rate": 2.891450612795996e-07, "loss": -0.0112, "num_tokens": 159157977.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7434735894203186, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18139368731322736, "rewards/wordcountpos_reward/raw_geo/std": 0.18980440285393488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1023.5, "completions/mean_terminated_length": 1023.5, "completions/min_length": 735.0, "completions/min_terminated_length": 735.0, "epoch": 0.7275455091018204, "frac_reward_zero_std": 0.0, "grad_norm": 3.4066041494670585, "kl": 0.018035888671875, "learning_rate": 2.8888907661020944e-07, "loss": 0.0055, "num_tokens": 159199569.0, "reward": 0.0, "reward_std": 0.2933778166770935, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05693280084802807, "rewards/wordcountpos_reward/raw_geo/std": 0.3795487479059352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1268.6875, "completions/mean_terminated_length": 1129.9000244140625, "completions/min_length": 989.0, "completions/min_terminated_length": 989.0, "epoch": 0.7277455491098219, "frac_reward_zero_std": 0.0, "grad_norm": 3.42958396920161, "kl": 0.016998291015625, "learning_rate": 2.886332192596912e-07, "loss": 0.0106, "num_tokens": 159235380.0, "reward": 0.0, "reward_std": 0.7510190010070801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08283268253305798, "rewards/wordcountpos_reward/raw_geo/std": 0.07494929124575442, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1192.4375, "completions/mean_terminated_length": 1171.933349609375, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.7279455891178236, "frac_reward_zero_std": 0.0, "grad_norm": 3.106821800509029, "kl": 0.0155487060546875, "learning_rate": 2.8837748935280213e-07, "loss": -0.0131, "num_tokens": 159279475.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7420637607574463, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08013599939564696, "rewards/wordcountpos_reward/raw_geo/std": 0.05928295555989302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1200.5625, "completions/mean_terminated_length": 1131.4615478515625, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.7281456291258251, "frac_reward_zero_std": 0.0, "grad_norm": 3.272404746289429, "kl": 0.015625, "learning_rate": 2.881218870142372e-07, "loss": -0.0284, "num_tokens": 159334132.0, "reward": 0.0, "reward_std": 0.8613318800926208, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08469976975513503, "rewards/wordcountpos_reward/raw_geo/std": 0.14697136736248245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1208.1875, "completions/mean_terminated_length": 1140.84619140625, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.7283456691338268, "frac_reward_zero_std": 0.0, "grad_norm": 3.163701859200711, "kl": 0.0177001953125, "learning_rate": 2.8786641236862927e-07, "loss": 0.0332, "num_tokens": 159387007.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8973047137260437, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0862366366592071, "rewards/wordcountpos_reward/raw_geo/std": 0.11261175976253313, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1039.125, "completions/mean_terminated_length": 1039.125, "completions/min_length": 519.0, "completions/min_terminated_length": 519.0, "epoch": 0.7285457091418284, "frac_reward_zero_std": 0.0, "grad_norm": 3.320087259646832, "kl": 0.0148162841796875, "learning_rate": 2.876110655405491e-07, "loss": -0.0281, "num_tokens": 159425201.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9786785840988159, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3873958314888099, "rewards/wordcountpos_reward/raw_geo/std": 0.3338432512654905, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1167.25, "completions/mean_terminated_length": 1119.71435546875, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.7287457491498299, "frac_reward_zero_std": 0.0, "grad_norm": 3.2848682916170224, "kl": 0.0213623046875, "learning_rate": 2.8735584665450497e-07, "loss": -0.0556, "num_tokens": 159459413.0, "reward": -2.9802322387695312e-08, "reward_std": 0.905066967010498, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03576637520087171, "rewards/wordcountpos_reward/raw_geo/std": 0.207039235047417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1189.5625, "completions/mean_terminated_length": 1145.21435546875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.7289457891578316, "frac_reward_zero_std": 0.0, "grad_norm": 3.413804427226355, "kl": 0.021820068359375, "learning_rate": 2.871007558349424e-07, "loss": -0.0008, "num_tokens": 159505294.0, "reward": 0.0, "reward_std": 0.72329181432724, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10829929688449913, "rewards/wordcountpos_reward/raw_geo/std": 0.15456818182127133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1318.625, "completions/mean_terminated_length": 1236.181884765625, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.7291458291658331, "frac_reward_zero_std": 0.0, "grad_norm": 2.6671473558868417, "kl": 0.0168304443359375, "learning_rate": 2.8684579320624515e-07, "loss": -0.0224, "num_tokens": 159558424.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8686923980712891, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09634649415975143, "rewards/wordcountpos_reward/raw_geo/std": 0.09147984836514829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1070.3125, "completions/mean_terminated_length": 1070.3125, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.7293458691738348, "frac_reward_zero_std": 0.0, "grad_norm": 2.1952440226500336, "kl": 0.00937652587890625, "learning_rate": 2.8659095889273425e-07, "loss": 0.0074, "num_tokens": 159600837.0, "reward": -1.862645149230957e-08, "reward_std": 1.0356948375701904, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02313695326807666, "rewards/wordcountpos_reward/raw_geo/std": 0.06296369800902021, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1019.4375, "completions/mean_terminated_length": 1019.4375, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.7295459091818364, "frac_reward_zero_std": 0.0, "grad_norm": 3.448160546570806, "kl": 0.01831817626953125, "learning_rate": 2.863362530186678e-07, "loss": -0.0329, "num_tokens": 159640780.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9681504368782043, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013519833407996437, "rewards/wordcountpos_reward/raw_geo/std": 0.07995051110701974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1413558682244267, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1209.375, "completions/mean_terminated_length": 1167.857177734375, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.7297459491898379, "frac_reward_zero_std": 0.0, "grad_norm": 3.384843781258482, "kl": 0.017547607421875, "learning_rate": 2.860816757082416e-07, "loss": -0.0252, "num_tokens": 159675450.0, "reward": 0.0, "reward_std": 0.6838712692260742, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04721507661648595, "rewards/wordcountpos_reward/raw_geo/std": 0.09763827401559241, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1148.5, "completions/mean_terminated_length": 1125.0667724609375, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.7299459891978396, "frac_reward_zero_std": 0.0, "grad_norm": 3.4749681486619353, "kl": 0.019622802734375, "learning_rate": 2.858272270855887e-07, "loss": 0.0108, "num_tokens": 159722674.0, "reward": -4.470348358154297e-08, "reward_std": 0.9583985805511475, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09462553198152872, "rewards/wordcountpos_reward/raw_geo/std": 0.15280842286609023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1051.0, "completions/max_terminated_length": 1051.0, "completions/mean_length": 845.5, "completions/mean_terminated_length": 845.5, "completions/min_length": 677.0, "completions/min_terminated_length": 677.0, "epoch": 0.7301460292058412, "frac_reward_zero_std": 0.0, "grad_norm": 3.135337682210577, "kl": 0.0128936767578125, "learning_rate": 2.8557290727477965e-07, "loss": 0.0214, "num_tokens": 159752578.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0678656101226807, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17042130459369037, "rewards/wordcountpos_reward/raw_geo/std": 0.16999304109442745, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1062.8125, "completions/mean_terminated_length": 1062.8125, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.7303460692138428, "frac_reward_zero_std": 0.0, "grad_norm": 3.5525696023191498, "kl": 0.01910400390625, "learning_rate": 2.8531871639982164e-07, "loss": -0.0124, "num_tokens": 159789399.0, "reward": -7.450580596923828e-09, "reward_std": 1.0438778400421143, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07443481537432661, "rewards/wordcountpos_reward/raw_geo/std": 0.06308296570297378, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767716, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1498.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1155.25, "completions/mean_terminated_length": 1155.25, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.7305461092218444, "frac_reward_zero_std": 0.0, "grad_norm": 3.0266618509321908, "kl": 0.0147552490234375, "learning_rate": 2.850646545846594e-07, "loss": 0.0605, "num_tokens": 159841059.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5570095181465149, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0011487247450269033, "rewards/wordcountpos_reward/raw_geo/std": 0.12583784386813665, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1241.3125, "completions/mean_terminated_length": 1204.357177734375, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.7307461492298459, "frac_reward_zero_std": 0.0, "grad_norm": 2.9815841987090814, "kl": 0.0136871337890625, "learning_rate": 2.848107219531746e-07, "loss": 0.0361, "num_tokens": 159896584.0, "reward": 0.0, "reward_std": 0.9241366386413574, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03409948241719651, "rewards/wordcountpos_reward/raw_geo/std": 0.13839367786066162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1333.4375, "completions/mean_terminated_length": 1277.916748046875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.7309461892378476, "frac_reward_zero_std": 0.0, "grad_norm": 2.6374040188293533, "kl": 0.013092041015625, "learning_rate": 2.845569186291862e-07, "loss": 0.0142, "num_tokens": 159948271.0, "reward": 0.0, "reward_std": 0.550606906414032, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24295049842882843, "rewards/wordcountpos_reward/raw_geo/std": 0.10546618261342829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1037.875, "completions/mean_terminated_length": 1037.875, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.7311462292458492, "frac_reward_zero_std": 0.0, "grad_norm": 3.9255879565973943, "kl": 0.020111083984375, "learning_rate": 2.843032447364492e-07, "loss": 0.0277, "num_tokens": 159987661.0, "reward": 0.0, "reward_std": 1.0420470237731934, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.036370188136021386, "rewards/wordcountpos_reward/raw_geo/std": 0.07716243110582975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1251.0, "completions/mean_terminated_length": 1215.4285888671875, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.7313462692538508, "frac_reward_zero_std": 0.0, "grad_norm": 3.0137275051004546, "kl": 0.0178375244140625, "learning_rate": 2.8404970039865693e-07, "loss": -0.0065, "num_tokens": 160039677.0, "reward": 0.0, "reward_std": 0.802413284778595, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12473945454233815, "rewards/wordcountpos_reward/raw_geo/std": 0.18002230177526524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1104.625, "completions/mean_terminated_length": 1048.1429443359375, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.7315463092618524, "frac_reward_zero_std": 0.0, "grad_norm": 3.3062772458683876, "kl": 0.0159454345703125, "learning_rate": 2.837962857394383e-07, "loss": -0.004, "num_tokens": 160079151.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0516231060028076, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05998185484208482, "rewards/wordcountpos_reward/raw_geo/std": 0.1735156584074931, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1170.75, "completions/mean_terminated_length": 1148.800048828125, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.7317463492698539, "frac_reward_zero_std": 0.0, "grad_norm": 2.8616517364144385, "kl": 0.01666259765625, "learning_rate": 2.835430008823595e-07, "loss": 0.0168, "num_tokens": 160125051.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9042466878890991, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13725701303936633, "rewards/wordcountpos_reward/raw_geo/std": 0.10198147670632633, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1238.75, "completions/mean_terminated_length": 1221.3333740234375, "completions/min_length": 1113.0, "completions/min_terminated_length": 1113.0, "epoch": 0.7319463892778556, "frac_reward_zero_std": 0.0, "grad_norm": 2.255381782885124, "kl": 0.010740280151367188, "learning_rate": 2.832898459509234e-07, "loss": 0.0022, "num_tokens": 160168335.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9581156969070435, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1638729450941483, "rewards/wordcountpos_reward/raw_geo/std": 0.07621256932546842, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 995.6875, "completions/mean_terminated_length": 995.6875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.7321464292858572, "frac_reward_zero_std": 0.0, "grad_norm": 2.3783109093519244, "kl": 0.01416015625, "learning_rate": 2.830368210685697e-07, "loss": 0.019, "num_tokens": 160199426.0, "reward": 0.0, "reward_std": 0.5689475536346436, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10730701937403322, "rewards/wordcountpos_reward/raw_geo/std": 0.11864436210438137, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1039.5, "completions/mean_terminated_length": 1039.5, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.7323464692938588, "frac_reward_zero_std": 0.0, "grad_norm": 3.0981305689646805, "kl": 0.014739990234375, "learning_rate": 2.827839263586741e-07, "loss": 0.0076, "num_tokens": 160244778.0, "reward": 0.0, "reward_std": 0.7813483476638794, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.015555983165212731, "rewards/wordcountpos_reward/raw_geo/std": 0.1150415035929419, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1344398529978149, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1186.75, "completions/mean_terminated_length": 1186.75, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.7325465093018604, "frac_reward_zero_std": 0.0, "grad_norm": 2.513406664453977, "kl": 0.01336669921875, "learning_rate": 2.8253116194454953e-07, "loss": -0.0325, "num_tokens": 160284574.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9090782403945923, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07677313184122919, "rewards/wordcountpos_reward/raw_geo/std": 0.07966437914823885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1259.9375, "completions/mean_terminated_length": 1204.5384521484375, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.7327465493098619, "frac_reward_zero_std": 0.0, "grad_norm": 2.260418593964924, "kl": 0.011932373046875, "learning_rate": 2.8227852794944486e-07, "loss": -0.0321, "num_tokens": 160331069.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0250600576400757, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010356899104718258, "rewards/wordcountpos_reward/raw_geo/std": 0.08804426849610035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 997.25, "completions/mean_terminated_length": 997.25, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.7329465893178636, "frac_reward_zero_std": 0.0, "grad_norm": 3.1640095341562606, "kl": 0.018218994140625, "learning_rate": 2.8202602449654577e-07, "loss": -0.0205, "num_tokens": 160375417.0, "reward": 0.0, "reward_std": 0.9783482551574707, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2285579833386068, "rewards/wordcountpos_reward/raw_geo/std": 0.10609725254844718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1073.6875, "completions/mean_terminated_length": 1073.6875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.7331466293258652, "frac_reward_zero_std": 0.0, "grad_norm": 3.489966869953515, "kl": 0.020355224609375, "learning_rate": 2.8177365170897396e-07, "loss": -0.0109, "num_tokens": 160422132.0, "reward": 0.0, "reward_std": 1.0080006122589111, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3329132722742995, "rewards/wordcountpos_reward/raw_geo/std": 0.06874747077103864, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1072.375, "completions/mean_terminated_length": 1072.375, "completions/min_length": 659.0, "completions/min_terminated_length": 659.0, "epoch": 0.7333466693338667, "frac_reward_zero_std": 0.0, "grad_norm": 3.533929592789459, "kl": 0.0159912109375, "learning_rate": 2.8152140970978743e-07, "loss": -0.0685, "num_tokens": 160459426.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9754714965820312, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12270735022985399, "rewards/wordcountpos_reward/raw_geo/std": 0.12561376764942636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1373.25, "completions/mean_terminated_length": 1274.6666259765625, "completions/min_length": 1046.0, "completions/min_terminated_length": 1046.0, "epoch": 0.7335467093418684, "frac_reward_zero_std": 0.0, "grad_norm": 2.634966740889278, "kl": 0.0166778564453125, "learning_rate": 2.812692986219807e-07, "loss": -0.0402, "num_tokens": 160513758.0, "reward": -7.450580596923828e-09, "reward_std": 1.0254154205322266, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06255600632601592, "rewards/wordcountpos_reward/raw_geo/std": 0.07899124578864643, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.12292725943057184, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1137.0, "completions/mean_terminated_length": 1137.0, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.7337467493498699, "frac_reward_zero_std": 0.0, "grad_norm": 2.725719009710059, "kl": 0.01629638671875, "learning_rate": 2.8101731856848444e-07, "loss": 0.0064, "num_tokens": 160553366.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0667228698730469, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11689660842068028, "rewards/wordcountpos_reward/raw_geo/std": 0.08445546973821143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.026874192494328493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1329.3125, "completions/mean_terminated_length": 1158.625, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.7339467893578716, "frac_reward_zero_std": 0.0, "grad_norm": 3.305232631304118, "kl": 0.017578125, "learning_rate": 2.8076546967216487e-07, "loss": -0.0204, "num_tokens": 160602235.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9527249336242676, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.001691488191597751, "rewards/wordcountpos_reward/raw_geo/std": 0.09928202216747718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1250.3125, "completions/mean_terminated_length": 1056.111083984375, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.7341468293658732, "frac_reward_zero_std": 0.0, "grad_norm": 3.1235179491061524, "kl": 0.0204010009765625, "learning_rate": 2.805137520558249e-07, "loss": -0.0753, "num_tokens": 160647768.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6078046560287476, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14478972351931185, "rewards/wordcountpos_reward/raw_geo/std": 0.30998217291016694, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477443, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1071.4375, "completions/mean_terminated_length": 1042.86669921875, "completions/min_length": 808.0, "completions/min_terminated_length": 808.0, "epoch": 0.7343468693738747, "frac_reward_zero_std": 0.0, "grad_norm": 2.9957832129033544, "kl": 0.02044677734375, "learning_rate": 2.8026216584220313e-07, "loss": -0.0192, "num_tokens": 160684735.0, "reward": 0.0, "reward_std": 0.7262300252914429, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15008399663876307, "rewards/wordcountpos_reward/raw_geo/std": 0.08448736881450425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1356.75, "completions/mean_terminated_length": 1213.5, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.7345469093818764, "frac_reward_zero_std": 0.0, "grad_norm": 2.5692562893061917, "kl": 0.013702392578125, "learning_rate": 2.800107111539743e-07, "loss": 0.0113, "num_tokens": 160744875.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6697025895118713, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09694862240235935, "rewards/wordcountpos_reward/raw_geo/std": 0.13264653650051775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1299.875, "completions/mean_terminated_length": 1271.2857666015625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.734746949389878, "frac_reward_zero_std": 0.0, "grad_norm": 3.1263890306342983, "kl": 0.018218994140625, "learning_rate": 2.7975938811374855e-07, "loss": 0.0395, "num_tokens": 160791193.0, "reward": 0.0, "reward_std": 0.6177002191543579, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01108461572222672, "rewards/wordcountpos_reward/raw_geo/std": 0.09375119473281524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.24083189157584592, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1097.9375, "completions/mean_terminated_length": 1097.9375, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.7349469893978796, "frac_reward_zero_std": 0.0, "grad_norm": 2.676170398401686, "kl": 0.0151824951171875, "learning_rate": 2.7950819684407226e-07, "loss": -0.0163, "num_tokens": 160829608.0, "reward": 0.0, "reward_std": 0.556490421295166, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07913192853096525, "rewards/wordcountpos_reward/raw_geo/std": 0.1370461416334444, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1222.625, "completions/mean_terminated_length": 1204.1334228515625, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.7351470294058812, "frac_reward_zero_std": 0.0, "grad_norm": 2.423821691436435, "kl": 0.0201873779296875, "learning_rate": 2.792571374674275e-07, "loss": -0.021, "num_tokens": 160869922.0, "reward": 0.0, "reward_std": 0.8660597205162048, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.026569275227446173, "rewards/wordcountpos_reward/raw_geo/std": 0.06405563860481767, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1206.6875, "completions/mean_terminated_length": 1164.7857666015625, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.7353470694138827, "frac_reward_zero_std": 0.0, "grad_norm": 2.940458885791934, "kl": 0.01483154296875, "learning_rate": 2.790062101062321e-07, "loss": -0.0381, "num_tokens": 160912237.0, "reward": 0.0, "reward_std": 0.8683557510375977, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21217187014718347, "rewards/wordcountpos_reward/raw_geo/std": 0.20634359741498778, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1149.6875, "completions/mean_terminated_length": 1032.916748046875, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.7355471094218844, "frac_reward_zero_std": 0.0, "grad_norm": 2.2815051707684417, "kl": 0.011199951171875, "learning_rate": 2.7875541488283886e-07, "loss": -0.0326, "num_tokens": 160961136.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9564840197563171, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024297529207109578, "rewards/wordcountpos_reward/raw_geo/std": 0.15026341177358818, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1392.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1201.375, "completions/mean_terminated_length": 1201.375, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.735747149429886, "frac_reward_zero_std": 0.0, "grad_norm": 3.248190458512116, "kl": 0.0178375244140625, "learning_rate": 2.7850475191953737e-07, "loss": -0.0168, "num_tokens": 160999718.0, "reward": 5.21540641784668e-08, "reward_std": 1.0459411144256592, "rewards/wordcountpos_reward/mean": 5.21540641784668e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02164161342143494, "rewards/wordcountpos_reward/raw_geo/std": 0.053935736792338276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1079.5, "completions/mean_terminated_length": 1079.5, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.7359471894378876, "frac_reward_zero_std": 0.0, "grad_norm": 2.8677358717421413, "kl": 0.0160675048828125, "learning_rate": 2.7825422133855145e-07, "loss": 0.0058, "num_tokens": 161033278.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8152828812599182, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08163297047088551, "rewards/wordcountpos_reward/raw_geo/std": 0.04910220161081795, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1224.625, "completions/mean_terminated_length": 1224.625, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.7361472294458892, "frac_reward_zero_std": 0.0, "grad_norm": 2.938375508634631, "kl": 0.0225372314453125, "learning_rate": 2.7800382326204126e-07, "loss": 0.0416, "num_tokens": 161085280.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6753498315811157, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.059507227940944096, "rewards/wordcountpos_reward/raw_geo/std": 0.061685708831158245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1217.375, "completions/mean_terminated_length": 1217.375, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.7363472694538907, "frac_reward_zero_std": 0.0, "grad_norm": 3.4612906322942516, "kl": 0.021240234375, "learning_rate": 2.7775355781210183e-07, "loss": 0.0019, "num_tokens": 161130142.0, "reward": 0.0, "reward_std": 1.0277976989746094, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16110319002892526, "rewards/wordcountpos_reward/raw_geo/std": 0.1730037062462017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1162.0625, "completions/mean_terminated_length": 1139.533447265625, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.7365473094618924, "frac_reward_zero_std": 0.0, "grad_norm": 3.5070164382507905, "kl": 0.02484130859375, "learning_rate": 2.775034251107642e-07, "loss": -0.0592, "num_tokens": 161182863.0, "reward": 0.0, "reward_std": 0.8524072170257568, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2283458978371925, "rewards/wordcountpos_reward/raw_geo/std": 0.294164408320108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 1054.0625, "completions/mean_terminated_length": 1054.0625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.736747349469894, "frac_reward_zero_std": 0.0, "grad_norm": 3.3169459303854816, "kl": 0.02044677734375, "learning_rate": 2.772534252799936e-07, "loss": -0.0018, "num_tokens": 161216504.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9571162462234497, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1090127027665608, "rewards/wordcountpos_reward/raw_geo/std": 0.04381635650506922, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1209.5, "completions/mean_terminated_length": 1168.0, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.7369473894778956, "frac_reward_zero_std": 0.0, "grad_norm": 3.5677247558876455, "kl": 0.020416259765625, "learning_rate": 2.770035584416913e-07, "loss": 0.0099, "num_tokens": 161261624.0, "reward": -2.9802322387695312e-08, "reward_std": 0.97107994556427, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04996618523480818, "rewards/wordcountpos_reward/raw_geo/std": 0.12896834354124972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 999.4375, "completions/mean_terminated_length": 999.4375, "completions/min_length": 797.0, "completions/min_terminated_length": 797.0, "epoch": 0.7371474294858972, "frac_reward_zero_std": 0.0, "grad_norm": 2.9590519448516384, "kl": 0.0137786865234375, "learning_rate": 2.7675382471769365e-07, "loss": 0.0054, "num_tokens": 161294919.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7118492126464844, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07389371058946304, "rewards/wordcountpos_reward/raw_geo/std": 0.10066793599381849, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1238.0, "completions/max_terminated_length": 1238.0, "completions/mean_length": 1064.3125, "completions/mean_terminated_length": 1064.3125, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.7373474694938987, "frac_reward_zero_std": 0.0, "grad_norm": 2.525710111166079, "kl": 0.014892578125, "learning_rate": 2.7650422422977197e-07, "loss": -0.0241, "num_tokens": 161334068.0, "reward": 0.0, "reward_std": 0.7200008630752563, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10615923351418236, "rewards/wordcountpos_reward/raw_geo/std": 0.07689027513668635, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1091.875, "completions/mean_terminated_length": 1064.666748046875, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.7375475095019004, "frac_reward_zero_std": 0.0, "grad_norm": 3.6044144702384626, "kl": 0.023834228515625, "learning_rate": 2.762547570996323e-07, "loss": -0.0358, "num_tokens": 161384314.0, "reward": 0.0, "reward_std": 0.8018282651901245, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.053166920588454236, "rewards/wordcountpos_reward/raw_geo/std": 0.1539060404172706, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0825518916489187, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 998.5625, "completions/mean_terminated_length": 998.5625, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.737747549509902, "frac_reward_zero_std": 0.0, "grad_norm": 3.412018184042999, "kl": 0.0153350830078125, "learning_rate": 2.760054234489162e-07, "loss": -0.0246, "num_tokens": 161423667.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9773026704788208, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02051344561739244, "rewards/wordcountpos_reward/raw_geo/std": 0.1353940988938194, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 1019.5, "completions/mean_terminated_length": 1019.5, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.7379475895179036, "frac_reward_zero_std": 0.0, "grad_norm": 3.051738079754195, "kl": 0.0133209228515625, "learning_rate": 2.757562233991997e-07, "loss": -0.023, "num_tokens": 161458315.0, "reward": 0.0, "reward_std": 0.6237664818763733, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02116955762095152, "rewards/wordcountpos_reward/raw_geo/std": 0.06338522815968177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1080.625, "completions/mean_terminated_length": 1080.625, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.7381476295259052, "frac_reward_zero_std": 0.0, "grad_norm": 4.032193130656439, "kl": 0.0198974609375, "learning_rate": 2.755071570719942e-07, "loss": -0.0069, "num_tokens": 161505205.0, "reward": 0.0, "reward_std": 0.8497796058654785, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10155670355087162, "rewards/wordcountpos_reward/raw_geo/std": 0.05199203568152987, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1296.9375, "completions/mean_terminated_length": 1229.25, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.7383476695339067, "frac_reward_zero_std": 0.0, "grad_norm": 3.2075707822554644, "kl": 0.018402099609375, "learning_rate": 2.7525822458874524e-07, "loss": 0.0003, "num_tokens": 161558660.0, "reward": 0.0, "reward_std": 0.8284253478050232, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00146166170221759, "rewards/wordcountpos_reward/raw_geo/std": 0.04921588192366201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.14194417264596723, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1149.0, "completions/mean_terminated_length": 1149.0, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.7385477095419084, "frac_reward_zero_std": 0.0, "grad_norm": 3.1004873451788324, "kl": 0.01837158203125, "learning_rate": 2.750094260708335e-07, "loss": 0.0165, "num_tokens": 161607996.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9925130605697632, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025513858949526354, "rewards/wordcountpos_reward/raw_geo/std": 0.05862658838827899, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1119.6875, "completions/mean_terminated_length": 1119.6875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.73874774954991, "frac_reward_zero_std": 0.0, "grad_norm": 3.2296131632712886, "kl": 0.020355224609375, "learning_rate": 2.7476076163957426e-07, "loss": 0.0051, "num_tokens": 161661975.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9825427532196045, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005894759256123401, "rewards/wordcountpos_reward/raw_geo/std": 0.009210839158973189, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 1095.1875, "completions/mean_terminated_length": 780.3333129882812, "completions/min_length": 603.0, "completions/min_terminated_length": 603.0, "epoch": 0.7389477895579116, "frac_reward_zero_std": 0.0, "grad_norm": 3.1361403782789017, "kl": 0.01580810546875, "learning_rate": 2.745122314162178e-07, "loss": -0.0439, "num_tokens": 161704866.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9741044044494629, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06313202604332613, "rewards/wordcountpos_reward/raw_geo/std": 0.10120415186858193, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1027.8125, "completions/mean_terminated_length": 1027.8125, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.7391478295659132, "frac_reward_zero_std": 0.0, "grad_norm": 3.63692067855408, "kl": 0.018463134765625, "learning_rate": 2.7426383552194775e-07, "loss": -0.0052, "num_tokens": 161755911.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8606092929840088, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.31542899018346887, "rewards/wordcountpos_reward/raw_geo/std": 0.41079829844616395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 952.625, "completions/mean_terminated_length": 952.625, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.7393478695739147, "frac_reward_zero_std": 0.0, "grad_norm": 3.5249460215501243, "kl": 0.0184783935546875, "learning_rate": 2.7401557407788404e-07, "loss": -0.0394, "num_tokens": 161787777.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5299421548843384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22152558925042845, "rewards/wordcountpos_reward/raw_geo/std": 0.06655726723331543, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1098.1875, "completions/mean_terminated_length": 1098.1875, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.7395479095819164, "frac_reward_zero_std": 0.0, "grad_norm": 3.2116421671163424, "kl": 0.0191650390625, "learning_rate": 2.737674472050794e-07, "loss": -0.0014, "num_tokens": 161833236.0, "reward": -2.9802322387695312e-08, "reward_std": 0.70786452293396, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022586938258037648, "rewards/wordcountpos_reward/raw_geo/std": 0.11327562377096008, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1159.2857666015625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.739747949589918, "frac_reward_zero_std": 0.0, "grad_norm": 3.374792699988731, "kl": 0.018096923828125, "learning_rate": 2.735194550245221e-07, "loss": -0.0442, "num_tokens": 161879330.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8803964257240295, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16343465083885172, "rewards/wordcountpos_reward/raw_geo/std": 0.18438662482232987, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1075.6875, "completions/mean_terminated_length": 1075.6875, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.7399479895979196, "frac_reward_zero_std": 0.0, "grad_norm": 2.380741781958444, "kl": 0.007171630859375, "learning_rate": 2.732715976571336e-07, "loss": -0.009, "num_tokens": 161919861.0, "reward": 0.0, "reward_std": 0.8789486289024353, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08214822158701032, "rewards/wordcountpos_reward/raw_geo/std": 0.12402641311903657, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1268.5625, "completions/mean_terminated_length": 1268.5625, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.7401480296059212, "frac_reward_zero_std": 0.0, "grad_norm": 2.6008473485686823, "kl": 0.0136260986328125, "learning_rate": 2.7302387522377104e-07, "loss": 0.0039, "num_tokens": 161965422.0, "reward": 1.4901161193847656e-08, "reward_std": 0.943639874458313, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09963865977224229, "rewards/wordcountpos_reward/raw_geo/std": 0.04663255362819394, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1279.75, "completions/mean_terminated_length": 1147.5999755859375, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.7403480696139227, "frac_reward_zero_std": 0.0, "grad_norm": 3.281425946011877, "kl": 0.02081298828125, "learning_rate": 2.727762878452246e-07, "loss": 0.0116, "num_tokens": 162023370.0, "reward": 0.0, "reward_std": 0.6920468807220459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.025148970127448034, "rewards/wordcountpos_reward/raw_geo/std": 0.1428365871998083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1069.75, "completions/mean_terminated_length": 1069.75, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.7405481096219244, "frac_reward_zero_std": 0.0, "grad_norm": 2.93286890343033, "kl": 0.016632080078125, "learning_rate": 2.725288356422187e-07, "loss": -0.0399, "num_tokens": 162064526.0, "reward": 0.0, "reward_std": 0.4631859064102173, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.21421246976350836, "rewards/wordcountpos_reward/raw_geo/std": 0.2830174839967298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1083.25, "completions/mean_terminated_length": 1083.25, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.740748149629926, "frac_reward_zero_std": 0.0, "grad_norm": 2.496573040737912, "kl": 0.013153076171875, "learning_rate": 2.7228151873541296e-07, "loss": -0.0212, "num_tokens": 162114074.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6897730231285095, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0029746301101447464, "rewards/wordcountpos_reward/raw_geo/std": 0.18257485783374286, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13924399049470285, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1343.5, "completions/mean_terminated_length": 1249.5999755859375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.7409481896379276, "frac_reward_zero_std": 0.0, "grad_norm": 2.4741971779730734, "kl": 0.0150299072265625, "learning_rate": 2.7203433724539946e-07, "loss": -0.0872, "num_tokens": 162166098.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9753495454788208, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06979812456565117, "rewards/wordcountpos_reward/raw_geo/std": 0.08428994841046374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639735, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1148.875, "completions/mean_terminated_length": 1148.875, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.7411482296459292, "frac_reward_zero_std": 0.0, "grad_norm": 3.1078799356483455, "kl": 0.0125274658203125, "learning_rate": 2.717872912927056e-07, "loss": -0.0227, "num_tokens": 162214968.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8831413984298706, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017072180027607815, "rewards/wordcountpos_reward/raw_geo/std": 0.06154034613388157, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1146.75, "completions/mean_terminated_length": 1146.75, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.7413482696539307, "frac_reward_zero_std": 0.0, "grad_norm": 1.6465261894895098, "kl": 0.0048065185546875, "learning_rate": 2.7154038099779153e-07, "loss": 0.0377, "num_tokens": 162256844.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0127284526824951, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05414896048831405, "rewards/wordcountpos_reward/raw_geo/std": 0.09857013080493691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1209.875, "completions/mean_terminated_length": 1168.4285888671875, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.7415483096619324, "frac_reward_zero_std": 0.0, "grad_norm": 2.984129090052008, "kl": 0.018646240234375, "learning_rate": 2.712936064810525e-07, "loss": -0.0774, "num_tokens": 162299482.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0204046964645386, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08849964597600257, "rewards/wordcountpos_reward/raw_geo/std": 0.07959368424328586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 982.125, "completions/mean_terminated_length": 982.125, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.741748349669934, "frac_reward_zero_std": 0.0, "grad_norm": 3.1838527356640953, "kl": 0.01837158203125, "learning_rate": 2.7104696786281623e-07, "loss": 0.0118, "num_tokens": 162342988.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9868683815002441, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.040302219409376554, "rewards/wordcountpos_reward/raw_geo/std": 0.13548057206146677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1052.8125, "completions/mean_terminated_length": 1052.8125, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.7419483896779356, "frac_reward_zero_std": 0.0, "grad_norm": 3.6371534703941215, "kl": 0.021148681640625, "learning_rate": 2.708004652633452e-07, "loss": -0.0188, "num_tokens": 162378361.0, "reward": 0.0, "reward_std": 0.7024555206298828, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09013063236571256, "rewards/wordcountpos_reward/raw_geo/std": 0.11828470805918505, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1292.875, "completions/mean_terminated_length": 1223.8333740234375, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.7421484296859372, "frac_reward_zero_std": 0.0, "grad_norm": 3.2580767778940856, "kl": 0.0218505859375, "learning_rate": 2.705540988028351e-07, "loss": -0.0251, "num_tokens": 162425863.0, "reward": -2.60770320892334e-08, "reward_std": 1.0284409523010254, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07617536890427591, "rewards/wordcountpos_reward/raw_geo/std": 0.07578584497690864, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1166.9375, "completions/mean_terminated_length": 1119.357177734375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.7423484696939388, "frac_reward_zero_std": 0.0, "grad_norm": 3.014199106422077, "kl": 0.0149993896484375, "learning_rate": 2.703078686014156e-07, "loss": -0.0705, "num_tokens": 162476406.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6461617350578308, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1113817005789169, "rewards/wordcountpos_reward/raw_geo/std": 0.2189686840759607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1300.9375, "completions/mean_terminated_length": 1287.666748046875, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.7425485097019404, "frac_reward_zero_std": 0.0, "grad_norm": 3.3503534613820687, "kl": 0.02197265625, "learning_rate": 2.7006177477914913e-07, "loss": 0.0203, "num_tokens": 162529957.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7316157817840576, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0578766178804147, "rewards/wordcountpos_reward/raw_geo/std": 0.07691932641741059, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1125.4375, "completions/mean_terminated_length": 1125.4375, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.742748549709942, "frac_reward_zero_std": 0.0, "grad_norm": 2.6474213278471965, "kl": 0.010986328125, "learning_rate": 2.698158174560326e-07, "loss": -0.0132, "num_tokens": 162573708.0, "reward": 0.0, "reward_std": 0.8254045248031616, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07451385141293292, "rewards/wordcountpos_reward/raw_geo/std": 0.10006574242327902, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1293.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1139.125, "completions/mean_terminated_length": 1139.125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.7429485897179436, "frac_reward_zero_std": 0.0, "grad_norm": 2.6076298632371055, "kl": 0.0109405517578125, "learning_rate": 2.6956999675199573e-07, "loss": 0.0145, "num_tokens": 162610806.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9127312302589417, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03807652868927885, "rewards/wordcountpos_reward/raw_geo/std": 0.07168866358486951, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1210.0625, "completions/mean_terminated_length": 1190.7333984375, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.7431486297259452, "frac_reward_zero_std": 0.0, "grad_norm": 2.5537164652445083, "kl": 0.0148162841796875, "learning_rate": 2.693243127869019e-07, "loss": 0.0366, "num_tokens": 162662135.0, "reward": 0.0, "reward_std": 0.909473180770874, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05836761694014411, "rewards/wordcountpos_reward/raw_geo/std": 0.17469158566999612, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1106.6875, "completions/mean_terminated_length": 975.5833740234375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.7433486697339468, "frac_reward_zero_std": 0.0, "grad_norm": 2.46822079760025, "kl": 0.014251708984375, "learning_rate": 2.6907876568054766e-07, "loss": 0.0152, "num_tokens": 162707986.0, "reward": 0.0, "reward_std": 0.8194214105606079, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01130749581024174, "rewards/wordcountpos_reward/raw_geo/std": 0.26592914666920164, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1148.4375, "completions/mean_terminated_length": 1148.4375, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.7435487097419484, "frac_reward_zero_std": 0.0, "grad_norm": 3.448118638895329, "kl": 0.01995849609375, "learning_rate": 2.6883335555266285e-07, "loss": 0.0222, "num_tokens": 162751297.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9900262355804443, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20008865939881068, "rewards/wordcountpos_reward/raw_geo/std": 0.09808587683228234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 937.625, "completions/mean_terminated_length": 937.625, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.74374874974995, "frac_reward_zero_std": 0.0, "grad_norm": 3.1297764151533527, "kl": 0.0150146484375, "learning_rate": 2.6858808252291063e-07, "loss": -0.0008, "num_tokens": 162790635.0, "reward": 0.0, "reward_std": 0.9614657163619995, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1381183760124471, "rewards/wordcountpos_reward/raw_geo/std": 0.08920747733793974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1327.375, "completions/mean_terminated_length": 1269.8333740234375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.7439487897579516, "frac_reward_zero_std": 0.0, "grad_norm": 3.1654709411230484, "kl": 0.0178985595703125, "learning_rate": 2.6834294671088736e-07, "loss": 0.0111, "num_tokens": 162835601.0, "reward": 0.0, "reward_std": 0.6784833669662476, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10628433863100775, "rewards/wordcountpos_reward/raw_geo/std": 0.15079015496897644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1220.1875, "completions/mean_terminated_length": 1052.300048828125, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.7441488297659532, "frac_reward_zero_std": 0.0, "grad_norm": 3.2798123679681392, "kl": 0.01995849609375, "learning_rate": 2.6809794823612213e-07, "loss": -0.0461, "num_tokens": 162888916.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7502108812332153, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23223010533075686, "rewards/wordcountpos_reward/raw_geo/std": 0.08831198825399883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1122.875, "completions/mean_terminated_length": 1122.875, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.7443488697739548, "frac_reward_zero_std": 0.0, "grad_norm": 2.6526734117707087, "kl": 0.0146026611328125, "learning_rate": 2.678530872180774e-07, "loss": -0.0351, "num_tokens": 162925178.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8481924533843994, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03764520461764992, "rewards/wordcountpos_reward/raw_geo/std": 0.17162887474680855, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 979.4375, "completions/mean_terminated_length": 979.4375, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.7445489097819564, "frac_reward_zero_std": 0.0, "grad_norm": 3.7863224771716193, "kl": 0.027191162109375, "learning_rate": 2.676083637761486e-07, "loss": 0.0271, "num_tokens": 162975633.0, "reward": 0.0, "reward_std": 0.7541972398757935, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03347811024421338, "rewards/wordcountpos_reward/raw_geo/std": 0.06322284141069431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1144.75, "completions/mean_terminated_length": 1094.0, "completions/min_length": 864.0, "completions/min_terminated_length": 864.0, "epoch": 0.744748949789958, "frac_reward_zero_std": 0.0, "grad_norm": 3.716036127186834, "kl": 0.02130126953125, "learning_rate": 2.67363778029664e-07, "loss": 0.0649, "num_tokens": 163026733.0, "reward": -1.4901161193847656e-08, "reward_std": 0.970742404460907, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.005965855696901362, "rewards/wordcountpos_reward/raw_geo/std": 0.12744157769081504, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1250.25, "completions/mean_terminated_length": 1233.60009765625, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.7449489897979595, "frac_reward_zero_std": 0.0, "grad_norm": 3.03448398475491, "kl": 0.0144195556640625, "learning_rate": 2.6711933009788443e-07, "loss": 0.0051, "num_tokens": 163066561.0, "reward": 5.960464477539063e-08, "reward_std": 0.7435988187789917, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05353619973700739, "rewards/wordcountpos_reward/raw_geo/std": 0.03380262352032307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1186.1875, "completions/mean_terminated_length": 1141.357177734375, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.7451490298059612, "frac_reward_zero_std": 0.0, "grad_norm": 2.8900211279702757, "kl": 0.019073486328125, "learning_rate": 2.668750201000044e-07, "loss": 0.055, "num_tokens": 163106260.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0603671073913574, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028907204664087423, "rewards/wordcountpos_reward/raw_geo/std": 0.09382396714134054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1299.5625, "completions/mean_terminated_length": 1253.3077392578125, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.7453490698139628, "frac_reward_zero_std": 0.0, "grad_norm": 2.449955660108569, "kl": 0.014251708984375, "learning_rate": 2.6663084815514997e-07, "loss": 0.0025, "num_tokens": 163146789.0, "reward": 2.9802322387695312e-08, "reward_std": 0.1795145869255066, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05129094672892676, "rewards/wordcountpos_reward/raw_geo/std": 0.054283152768363846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1100.3125, "completions/mean_terminated_length": 1100.3125, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.7455491098219644, "frac_reward_zero_std": 0.0, "grad_norm": 3.4976850167103883, "kl": 0.019622802734375, "learning_rate": 2.663868143823809e-07, "loss": 0.0072, "num_tokens": 163198842.0, "reward": 0.0, "reward_std": 0.9998623728752136, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.055929163647706205, "rewards/wordcountpos_reward/raw_geo/std": 0.18533343604050415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1190.3333740234375, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.745749149829966, "frac_reward_zero_std": 0.0, "grad_norm": 3.090161672987841, "kl": 0.01837158203125, "learning_rate": 2.6614291890068864e-07, "loss": -0.0249, "num_tokens": 163239269.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9963809251785278, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02982339136301949, "rewards/wordcountpos_reward/raw_geo/std": 0.0723028121178648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1093.625, "completions/mean_terminated_length": 1093.625, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.7459491898379675, "frac_reward_zero_std": 0.0, "grad_norm": 3.2150896850335533, "kl": 0.0135498046875, "learning_rate": 2.6589916182899844e-07, "loss": 0.0235, "num_tokens": 163278311.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5939237475395203, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06341226315959086, "rewards/wordcountpos_reward/raw_geo/std": 0.20336036391617734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1354.8125, "completions/mean_terminated_length": 1288.8182373046875, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.7461492298459692, "frac_reward_zero_std": 0.0, "grad_norm": 3.2416995195132907, "kl": 0.018035888671875, "learning_rate": 2.6565554328616684e-07, "loss": -0.0002, "num_tokens": 163330420.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8304122686386108, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13369644367400133, "rewards/wordcountpos_reward/raw_geo/std": 0.12933564627586575, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1158.0625, "completions/mean_terminated_length": 1158.0625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.7463492698539708, "frac_reward_zero_std": 0.0, "grad_norm": 3.2835358544893563, "kl": 0.0179443359375, "learning_rate": 2.6541206339098354e-07, "loss": 0.0441, "num_tokens": 163383893.0, "reward": 0.0, "reward_std": 0.985384464263916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.044021411991683024, "rewards/wordcountpos_reward/raw_geo/std": 0.10782235620061359, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1012.5, "completions/mean_terminated_length": 1012.5, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.7465493098619724, "frac_reward_zero_std": 0.0, "grad_norm": 2.97303810056267, "kl": 0.0150909423828125, "learning_rate": 2.6516872226217047e-07, "loss": -0.0329, "num_tokens": 163415477.0, "reward": -1.4901161193847656e-08, "reward_std": 0.969115674495697, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21710098915839338, "rewards/wordcountpos_reward/raw_geo/std": 0.3434056588517162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1177.9375, "completions/mean_terminated_length": 1177.9375, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.746749349869974, "frac_reward_zero_std": 0.0, "grad_norm": 3.4826446171818133, "kl": 0.026214599609375, "learning_rate": 2.6492552001838186e-07, "loss": -0.0144, "num_tokens": 163465236.0, "reward": 0.0, "reward_std": 0.8304725289344788, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05026601792713787, "rewards/wordcountpos_reward/raw_geo/std": 0.23383353407032859, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1243.6875, "completions/mean_terminated_length": 1226.60009765625, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.7469493898779755, "frac_reward_zero_std": 0.0, "grad_norm": 3.3148135561887004, "kl": 0.0198974609375, "learning_rate": 2.6468245677820414e-07, "loss": -0.0293, "num_tokens": 163517263.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7565702795982361, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014605732242984323, "rewards/wordcountpos_reward/raw_geo/std": 0.13243466198003742, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1214.4375, "completions/mean_terminated_length": 1214.4375, "completions/min_length": 1021.0, "completions/min_terminated_length": 1021.0, "epoch": 0.7471494298859772, "frac_reward_zero_std": 0.0, "grad_norm": 3.4657142259437355, "kl": 0.018341064453125, "learning_rate": 2.64439532660156e-07, "loss": 0.0206, "num_tokens": 163563134.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9066266417503357, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1928969773408834, "rewards/wordcountpos_reward/raw_geo/std": 0.206817369281202, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13743685418725538, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1189.9375, "completions/mean_terminated_length": 1189.9375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.7473494698939788, "frac_reward_zero_std": 0.0, "grad_norm": 3.603764486319385, "kl": 0.019805908203125, "learning_rate": 2.641967477826885e-07, "loss": 0.0596, "num_tokens": 163598301.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8114109039306641, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.005328770247198906, "rewards/wordcountpos_reward/raw_geo/std": 0.12915882341142215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1189.1875, "completions/mean_terminated_length": 1168.4666748046875, "completions/min_length": 1024.0, "completions/min_terminated_length": 1024.0, "epoch": 0.7475495099019804, "frac_reward_zero_std": 0.0, "grad_norm": 3.694333640573004, "kl": 0.020050048828125, "learning_rate": 2.639541022641847e-07, "loss": 0.0271, "num_tokens": 163642136.0, "reward": 0.0, "reward_std": 0.8656383156776428, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.013633127461466997, "rewards/wordcountpos_reward/raw_geo/std": 0.04011655471449004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13817594795257457, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 1261.0625, "completions/mean_terminated_length": 1022.125, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.747749549909982, "frac_reward_zero_std": 0.0, "grad_norm": 3.4940619950541527, "kl": 0.0166168212890625, "learning_rate": 2.637115962229594e-07, "loss": -0.0473, "num_tokens": 163689777.0, "reward": 0.0, "reward_std": 0.9598703384399414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0193199581755619, "rewards/wordcountpos_reward/raw_geo/std": 0.13707976689694137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15869840952317446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1051.5, "completions/mean_terminated_length": 1051.5, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.7479495899179835, "frac_reward_zero_std": 0.0, "grad_norm": 3.2611272479873263, "kl": 0.0165252685546875, "learning_rate": 2.6346922977725985e-07, "loss": 0.0161, "num_tokens": 163717905.0, "reward": 0.0, "reward_std": 0.6622365117073059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15813434034145218, "rewards/wordcountpos_reward/raw_geo/std": 0.17537103491278389, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1243.125, "completions/mean_terminated_length": 1043.3333740234375, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.7481496299259852, "frac_reward_zero_std": 0.0, "grad_norm": 3.2133824357150313, "kl": 0.0181121826171875, "learning_rate": 2.632270030452649e-07, "loss": 0.0526, "num_tokens": 163774851.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7565584778785706, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10915540689999953, "rewards/wordcountpos_reward/raw_geo/std": 0.15338326040169056, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 964.5, "completions/mean_terminated_length": 964.5, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.7483496699339868, "frac_reward_zero_std": 0.0, "grad_norm": 3.479348901471219, "kl": 0.018707275390625, "learning_rate": 2.6298491614508566e-07, "loss": 0.0294, "num_tokens": 163810299.0, "reward": 0.0, "reward_std": 0.9581551551818848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10063198699198585, "rewards/wordcountpos_reward/raw_geo/std": 0.0971494361506788, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1259.4375, "completions/mean_terminated_length": 1243.4000244140625, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.7485497099419884, "frac_reward_zero_std": 0.0, "grad_norm": 3.3013211707870775, "kl": 0.02056884765625, "learning_rate": 2.6274296919476443e-07, "loss": 0.0046, "num_tokens": 163861058.0, "reward": 0.0, "reward_std": 1.0052552223205566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.036845473582724754, "rewards/wordcountpos_reward/raw_geo/std": 0.11905691757443214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1117.125, "completions/mean_terminated_length": 1117.125, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.74874974994999, "frac_reward_zero_std": 0.0, "grad_norm": 3.6259150824405095, "kl": 0.022430419921875, "learning_rate": 2.625011623122758e-07, "loss": -0.0037, "num_tokens": 163909372.0, "reward": 0.0, "reward_std": 0.9045679569244385, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05380988600773626, "rewards/wordcountpos_reward/raw_geo/std": 0.10657365729500679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1459.5, "completions/mean_terminated_length": 1407.4285888671875, "completions/min_length": 1279.0, "completions/min_terminated_length": 1279.0, "epoch": 0.7489497899579916, "frac_reward_zero_std": 0.0, "grad_norm": 2.470471057771461, "kl": 0.01128387451171875, "learning_rate": 2.6225949561552597e-07, "loss": -0.0157, "num_tokens": 163973924.0, "reward": -7.450580596923828e-09, "reward_std": 1.041560173034668, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.013461292832668764, "rewards/wordcountpos_reward/raw_geo/std": 0.10204811070365713, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1252.125, "completions/mean_terminated_length": 1216.71435546875, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.7491498299659932, "frac_reward_zero_std": 0.0, "grad_norm": 3.027000641063679, "kl": 0.016632080078125, "learning_rate": 2.620179692223529e-07, "loss": -0.0068, "num_tokens": 164029390.0, "reward": 0.0, "reward_std": 0.8252308368682861, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026534474270561915, "rewards/wordcountpos_reward/raw_geo/std": 0.0379336034520484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408157, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1176.125, "completions/mean_terminated_length": 1176.125, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.7493498699739948, "frac_reward_zero_std": 0.0, "grad_norm": 1.8752854078985517, "kl": 0.0073699951171875, "learning_rate": 2.617765832505253e-07, "loss": -0.0003, "num_tokens": 164069664.0, "reward": 0.0, "reward_std": 0.6116889715194702, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15382458903375062, "rewards/wordcountpos_reward/raw_geo/std": 0.10669017280464944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1040.5625, "completions/mean_terminated_length": 1040.5625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.7495499099819964, "frac_reward_zero_std": 0.0, "grad_norm": 2.895698071069323, "kl": 0.0160980224609375, "learning_rate": 2.6153533781774485e-07, "loss": 0.014, "num_tokens": 164107585.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0326392650604248, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18663639668860627, "rewards/wordcountpos_reward/raw_geo/std": 0.2509171899438216, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1012.625, "completions/mean_terminated_length": 1012.625, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.749749949989998, "frac_reward_zero_std": 0.0, "grad_norm": 3.711852700315105, "kl": 0.0139923095703125, "learning_rate": 2.612942330416435e-07, "loss": 0.0289, "num_tokens": 164150979.0, "reward": 0.0, "reward_std": 0.9462813138961792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2234163445047657, "rewards/wordcountpos_reward/raw_geo/std": 0.09182222720066663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 971.125, "completions/mean_terminated_length": 971.125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.7499499899979996, "frac_reward_zero_std": 0.0, "grad_norm": 3.6189157001519963, "kl": 0.018768310546875, "learning_rate": 2.610532690397852e-07, "loss": -0.0333, "num_tokens": 164197373.0, "reward": 0.0, "reward_std": 0.4530988037586212, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1110698163323104, "rewards/wordcountpos_reward/raw_geo/std": 0.30004058269682704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18170998464178714, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1233.0, "completions/mean_terminated_length": 1171.3846435546875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.7501500300060012, "frac_reward_zero_std": 0.0, "grad_norm": 3.049555151426575, "kl": 0.01470947265625, "learning_rate": 2.6081244592966466e-07, "loss": 0.007, "num_tokens": 164251421.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0591976642608643, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013999681332679223, "rewards/wordcountpos_reward/raw_geo/std": 0.17600033342238228, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1034.8125, "completions/mean_terminated_length": 1003.800048828125, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.7503500700140028, "frac_reward_zero_std": 0.0, "grad_norm": 3.874266206412881, "kl": 0.0213623046875, "learning_rate": 2.6057176382870903e-07, "loss": 0.0146, "num_tokens": 164301474.0, "reward": 0.0, "reward_std": 0.7120554447174072, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13348553086594608, "rewards/wordcountpos_reward/raw_geo/std": 0.10733370756246478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1189459883650901, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1014.625, "completions/mean_terminated_length": 1014.625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.7505501100220044, "frac_reward_zero_std": 0.0, "grad_norm": 3.5928297063296815, "kl": 0.018524169921875, "learning_rate": 2.603312228542754e-07, "loss": -0.0299, "num_tokens": 164353508.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8588607311248779, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.038243667040053254, "rewards/wordcountpos_reward/raw_geo/std": 0.1872252456388012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1152.9375, "completions/mean_terminated_length": 1129.800048828125, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.750750150030006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1127685272473062, "kl": 0.017852783203125, "learning_rate": 2.6009082312365296e-07, "loss": -0.0262, "num_tokens": 164396483.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0433835983276367, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2520340814987514, "rewards/wordcountpos_reward/raw_geo/std": 0.16239391542653267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 978.3125, "completions/mean_terminated_length": 978.3125, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.7509501900380076, "frac_reward_zero_std": 0.0, "grad_norm": 3.019656335710954, "kl": 0.0149383544921875, "learning_rate": 2.5985056475406163e-07, "loss": -0.0213, "num_tokens": 164439568.0, "reward": 0.0, "reward_std": 0.7147709131240845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08205477157821468, "rewards/wordcountpos_reward/raw_geo/std": 0.11867397806072198, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1180.125, "completions/mean_terminated_length": 1158.800048828125, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.7511502300460092, "frac_reward_zero_std": 0.0, "grad_norm": 3.3043989032986842, "kl": 0.0167236328125, "learning_rate": 2.596104478626524e-07, "loss": -0.0178, "num_tokens": 164492506.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0203129053115845, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0533083401219386, "rewards/wordcountpos_reward/raw_geo/std": 0.12328485325123957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 1059.0, "completions/mean_terminated_length": 1059.0, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.7513502700540108, "frac_reward_zero_std": 0.0, "grad_norm": 2.837059047755856, "kl": 0.0122528076171875, "learning_rate": 2.593704725665075e-07, "loss": -0.0141, "num_tokens": 164532442.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0004377365112305, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08294476161316262, "rewards/wordcountpos_reward/raw_geo/std": 0.3145780386524268, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1302.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1116.9375, "completions/mean_terminated_length": 1116.9375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.7515503100620124, "frac_reward_zero_std": 0.0, "grad_norm": 3.551436795193962, "kl": 0.0205078125, "learning_rate": 2.5913063898263975e-07, "loss": -0.0229, "num_tokens": 164572809.0, "reward": 0.0, "reward_std": 0.6533830165863037, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07455925227724557, "rewards/wordcountpos_reward/raw_geo/std": 0.07251975889134407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1267.1875, "completions/mean_terminated_length": 1267.1875, "completions/min_length": 1122.0, "completions/min_terminated_length": 1122.0, "epoch": 0.751750350070014, "frac_reward_zero_std": 0.0, "grad_norm": 2.873974951827714, "kl": 0.02032470703125, "learning_rate": 2.5889094722799343e-07, "loss": -0.0358, "num_tokens": 164620900.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0299146175384521, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0048641145054367455, "rewards/wordcountpos_reward/raw_geo/std": 0.10025216443807772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1193.375, "completions/mean_terminated_length": 1172.933349609375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.7519503900780156, "frac_reward_zero_std": 0.0, "grad_norm": 2.457306479468301, "kl": 0.0119781494140625, "learning_rate": 2.58651397419443e-07, "loss": -0.0391, "num_tokens": 164669346.0, "reward": 0.0, "reward_std": 0.8900537490844727, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1036501687945671, "rewards/wordcountpos_reward/raw_geo/std": 0.10797476121067498, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 976.25, "completions/mean_terminated_length": 976.25, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.7521504300860172, "frac_reward_zero_std": 0.0, "grad_norm": 3.412774902895379, "kl": 0.017425537109375, "learning_rate": 2.584119896737942e-07, "loss": -0.0516, "num_tokens": 164704534.0, "reward": 0.0, "reward_std": 0.9832000732421875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01662828966635526, "rewards/wordcountpos_reward/raw_geo/std": 0.05311828140464849, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 1165.0625, "completions/mean_terminated_length": 1012.8182373046875, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.7523504700940188, "frac_reward_zero_std": 0.0, "grad_norm": 2.658805729399333, "kl": 0.01113128662109375, "learning_rate": 2.5817272410778327e-07, "loss": -0.0768, "num_tokens": 164739135.0, "reward": 0.0, "reward_std": 0.8352234363555908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03803430486743088, "rewards/wordcountpos_reward/raw_geo/std": 0.07915111399811843, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1134.3125, "completions/mean_terminated_length": 1134.3125, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.7525505101020205, "frac_reward_zero_std": 0.0, "grad_norm": 2.6316766178420408, "kl": 0.0125885009765625, "learning_rate": 2.579336008380774e-07, "loss": -0.0114, "num_tokens": 164783084.0, "reward": 0.0, "reward_std": 0.7150232791900635, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07173653057883275, "rewards/wordcountpos_reward/raw_geo/std": 0.07001827401594192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 900.3125, "completions/mean_terminated_length": 900.3125, "completions/min_length": 694.0, "completions/min_terminated_length": 694.0, "epoch": 0.752750550110022, "frac_reward_zero_std": 0.0, "grad_norm": 2.5461661271890716, "kl": 0.0119171142578125, "learning_rate": 2.5769461998127385e-07, "loss": -0.0242, "num_tokens": 164822505.0, "reward": 0.0, "reward_std": 0.9319257736206055, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.008155898777836762, "rewards/wordcountpos_reward/raw_geo/std": 0.08113228787756802, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1093.3125, "completions/mean_terminated_length": 1093.3125, "completions/min_length": 938.0, "completions/min_terminated_length": 938.0, "epoch": 0.7529505901180236, "frac_reward_zero_std": 0.0, "grad_norm": 2.573877587783675, "kl": 0.015716552734375, "learning_rate": 2.5745578165390094e-07, "loss": 0.0031, "num_tokens": 164857862.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9293373823165894, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03215889247103518, "rewards/wordcountpos_reward/raw_geo/std": 0.11775119051854645, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1022.8125, "completions/mean_terminated_length": 1022.8125, "completions/min_length": 761.0, "completions/min_terminated_length": 761.0, "epoch": 0.7531506301260252, "frac_reward_zero_std": 0.0, "grad_norm": 2.9360323342165167, "kl": 0.01717376708984375, "learning_rate": 2.572170859724173e-07, "loss": -0.029, "num_tokens": 164910523.0, "reward": 0.0, "reward_std": 0.7384215593338013, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.014479771698462193, "rewards/wordcountpos_reward/raw_geo/std": 0.12042370477904688, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1391.25, "completions/mean_terminated_length": 1306.6666259765625, "completions/min_length": 1110.0, "completions/min_terminated_length": 1110.0, "epoch": 0.7533506701340268, "frac_reward_zero_std": 0.0, "grad_norm": 2.637035828701996, "kl": 0.0127410888671875, "learning_rate": 2.569785330532123e-07, "loss": 0.0188, "num_tokens": 164966103.0, "reward": 0.0, "reward_std": 0.8677883148193359, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14848112418457823, "rewards/wordcountpos_reward/raw_geo/std": 0.08219770230624432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16727666149669979, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1328.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1014.6875, "completions/mean_terminated_length": 1014.6875, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.7535507101420285, "frac_reward_zero_std": 0.0, "grad_norm": 3.1637680010493843, "kl": 0.015411376953125, "learning_rate": 2.5674012301260483e-07, "loss": 0.009, "num_tokens": 164997874.0, "reward": -7.450580596923828e-09, "reward_std": 1.0361754894256592, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13598311629326254, "rewards/wordcountpos_reward/raw_geo/std": 0.17142205170123292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1030.5, "completions/mean_terminated_length": 1030.5, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.75375075015003, "frac_reward_zero_std": 0.0, "grad_norm": 2.8992559564432843, "kl": 0.01556396484375, "learning_rate": 2.5650185596684513e-07, "loss": 0.0046, "num_tokens": 165042346.0, "reward": 2.9802322387695312e-08, "reward_std": 0.894347608089447, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05045941900279602, "rewards/wordcountpos_reward/raw_geo/std": 0.09282973090243453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1060.0, "completions/max_terminated_length": 1060.0, "completions/mean_length": 920.3125, "completions/mean_terminated_length": 920.3125, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.7539507901580316, "frac_reward_zero_std": 0.0, "grad_norm": 3.9621186696523916, "kl": 0.02056884765625, "learning_rate": 2.562637320321132e-07, "loss": -0.0006, "num_tokens": 165090551.0, "reward": 3.725290298461914e-08, "reward_std": 1.0410411357879639, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027787030524120623, "rewards/wordcountpos_reward/raw_geo/std": 0.07302789561683437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1106.125, "completions/mean_terminated_length": 1106.125, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.7541508301660332, "frac_reward_zero_std": 0.0, "grad_norm": 3.3256239657929223, "kl": 0.0155487060546875, "learning_rate": 2.5602575132451954e-07, "loss": -0.0693, "num_tokens": 165132345.0, "reward": -7.450580596923828e-09, "reward_std": 1.0127053260803223, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.11073235802041773, "rewards/wordcountpos_reward/raw_geo/std": 0.07464727177670273, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1151.875, "completions/mean_terminated_length": 1151.875, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.7543508701740348, "frac_reward_zero_std": 0.0, "grad_norm": 3.3628712029930536, "kl": 0.017669677734375, "learning_rate": 2.557879139601041e-07, "loss": -0.0511, "num_tokens": 165173599.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0015623569488525, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012770933401978504, "rewards/wordcountpos_reward/raw_geo/std": 0.0660900666299453, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1161.625, "completions/mean_terminated_length": 1161.625, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.7545509101820365, "frac_reward_zero_std": 0.0, "grad_norm": 3.364991157705002, "kl": 0.019195556640625, "learning_rate": 2.555502200548381e-07, "loss": -0.0408, "num_tokens": 165217785.0, "reward": 0.0, "reward_std": 0.7060739398002625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02466542166171348, "rewards/wordcountpos_reward/raw_geo/std": 0.045987885076765996, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1229.0, "completions/max_terminated_length": 1229.0, "completions/mean_length": 1029.4375, "completions/mean_terminated_length": 1029.4375, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.754750950190038, "frac_reward_zero_std": 0.0, "grad_norm": 3.5020307406739337, "kl": 0.023162841796875, "learning_rate": 2.553126697246217e-07, "loss": 0.0012, "num_tokens": 165267528.0, "reward": -3.725290298461914e-09, "reward_std": 1.0575177669525146, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.03187373914451072, "rewards/wordcountpos_reward/raw_geo/std": 0.14255414872012656, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1145.6875, "completions/mean_terminated_length": 1095.071533203125, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.7549509901980396, "frac_reward_zero_std": 0.0, "grad_norm": 3.058111799150252, "kl": 0.0140533447265625, "learning_rate": 2.5507526308528595e-07, "loss": -0.009, "num_tokens": 165306283.0, "reward": 0.0, "reward_std": 1.0484849214553833, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03990419089899134, "rewards/wordcountpos_reward/raw_geo/std": 0.04731827558465806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 995.5, "completions/mean_terminated_length": 995.5, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.7551510302060412, "frac_reward_zero_std": 0.0, "grad_norm": 3.554705484181012, "kl": 0.021392822265625, "learning_rate": 2.548380002525907e-07, "loss": 0.0047, "num_tokens": 165355571.0, "reward": 0.0, "reward_std": 0.9163424968719482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11670163565536112, "rewards/wordcountpos_reward/raw_geo/std": 0.085386284255653, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1036.5, "completions/mean_terminated_length": 1005.6000366210938, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.7553510702140428, "frac_reward_zero_std": 0.0, "grad_norm": 3.7887870092775566, "kl": 0.022674560546875, "learning_rate": 2.5460088134222723e-07, "loss": -0.0197, "num_tokens": 165400555.0, "reward": 2.9802322387695312e-08, "reward_std": 1.042341947555542, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20064199144517048, "rewards/wordcountpos_reward/raw_geo/std": 0.13265813796794865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.13045504405165223, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 893.0, "completions/mean_terminated_length": 893.0, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.7555511102220445, "frac_reward_zero_std": 0.0, "grad_norm": 4.223906123873375, "kl": 0.022857666015625, "learning_rate": 2.543639064698152e-07, "loss": -0.0335, "num_tokens": 165431811.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9586639404296875, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03656483289389001, "rewards/wordcountpos_reward/raw_geo/std": 0.079214828520837, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1228.5625, "completions/mean_terminated_length": 1210.4666748046875, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.755751150230046, "frac_reward_zero_std": 0.0, "grad_norm": 3.2416640320931163, "kl": 0.020660400390625, "learning_rate": 2.5412707575090486e-07, "loss": -0.0227, "num_tokens": 165482916.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0346360206604004, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06792094967302686, "rewards/wordcountpos_reward/raw_geo/std": 0.12421061050763976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1053.8125, "completions/mean_terminated_length": 1053.8125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.7559511902380476, "frac_reward_zero_std": 0.0, "grad_norm": 3.0927356920060305, "kl": 0.016387939453125, "learning_rate": 2.5389038930097594e-07, "loss": -0.0134, "num_tokens": 165517441.0, "reward": 0.0, "reward_std": 0.7463462352752686, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1324383269489905, "rewards/wordcountpos_reward/raw_geo/std": 0.264982409324867, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1024.375, "completions/mean_terminated_length": 1024.375, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.7561512302460492, "frac_reward_zero_std": 0.0, "grad_norm": 3.6296320932429653, "kl": 0.02001953125, "learning_rate": 2.5365384723543787e-07, "loss": 0.0005, "num_tokens": 165558967.0, "reward": -5.960464477539063e-08, "reward_std": 0.8933889865875244, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09118178820638533, "rewards/wordcountpos_reward/raw_geo/std": 0.28392648992514374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1230.6875, "completions/mean_terminated_length": 1212.7333984375, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.7563512702540508, "frac_reward_zero_std": 0.0, "grad_norm": 2.9430849450568566, "kl": 0.0148162841796875, "learning_rate": 2.5341744966962943e-07, "loss": -0.0491, "num_tokens": 165599370.0, "reward": -1.1175870895385742e-08, "reward_std": 1.060374140739441, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024681204096416578, "rewards/wordcountpos_reward/raw_geo/std": 0.1903967755865126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1195.0, "completions/mean_length": 978.375, "completions/mean_terminated_length": 943.6000366210938, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.7565513102620524, "frac_reward_zero_std": 0.0, "grad_norm": 3.5695246459015713, "kl": 0.01824951171875, "learning_rate": 2.531811967188193e-07, "loss": 0.012, "num_tokens": 165634680.0, "reward": 0.0, "reward_std": 0.8934910297393799, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.25803963027673116, "rewards/wordcountpos_reward/raw_geo/std": 0.15707946764171132, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12285191326386657, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1249.0625, "completions/mean_terminated_length": 1232.3333740234375, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.756751350270054, "frac_reward_zero_std": 0.0, "grad_norm": 2.6157829636110437, "kl": 0.011322021484375, "learning_rate": 2.5294508849820553e-07, "loss": -0.0388, "num_tokens": 165678897.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0652118921279907, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2365588628686003, "rewards/wordcountpos_reward/raw_geo/std": 0.14264045677881676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746354, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1093.8125, "completions/mean_terminated_length": 1093.8125, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.7569513902780556, "frac_reward_zero_std": 0.0, "grad_norm": 3.253321322551815, "kl": 0.018707275390625, "learning_rate": 2.5270912512291564e-07, "loss": -0.0434, "num_tokens": 165729542.0, "reward": 0.0, "reward_std": 0.9060355424880981, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0741560201446335, "rewards/wordcountpos_reward/raw_geo/std": 0.12263408908202894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 1101.5, "completions/mean_terminated_length": 1101.5, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.7571514302860572, "frac_reward_zero_std": 0.0, "grad_norm": 3.3288035108610616, "kl": 0.018829345703125, "learning_rate": 2.5247330670800616e-07, "loss": -0.0145, "num_tokens": 165762230.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0312753915786743, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13803865288112876, "rewards/wordcountpos_reward/raw_geo/std": 0.061169339338409554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1165.0, "completions/mean_terminated_length": 1165.0, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.7573514702940588, "frac_reward_zero_std": 0.0, "grad_norm": 3.6616030796750887, "kl": 0.0196533203125, "learning_rate": 2.5223763336846333e-07, "loss": 0.024, "num_tokens": 165805390.0, "reward": 0.0, "reward_std": 0.8859190940856934, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07545885495398222, "rewards/wordcountpos_reward/raw_geo/std": 0.09930043790786373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1157.0, "completions/mean_terminated_length": 1134.1334228515625, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.7575515103020604, "frac_reward_zero_std": 0.0, "grad_norm": 3.4208233173076854, "kl": 0.02154541015625, "learning_rate": 2.5200210521920264e-07, "loss": 0.0245, "num_tokens": 165856742.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9524872899055481, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21749751928010141, "rewards/wordcountpos_reward/raw_geo/std": 0.08007609223685212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1153.625, "completions/mean_terminated_length": 1153.625, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.757751550310062, "frac_reward_zero_std": 0.0, "grad_norm": 2.8326526626600295, "kl": 0.0138702392578125, "learning_rate": 2.517667223750687e-07, "loss": 0.0046, "num_tokens": 165902560.0, "reward": 5.960464477539063e-08, "reward_std": 0.9098324775695801, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18895266959040188, "rewards/wordcountpos_reward/raw_geo/std": 0.06948865462297853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1039.875, "completions/mean_terminated_length": 1039.875, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.7579515903180636, "frac_reward_zero_std": 0.0, "grad_norm": 3.631354466861727, "kl": 0.020721435546875, "learning_rate": 2.5153148495083505e-07, "loss": -0.0453, "num_tokens": 165946582.0, "reward": 0.0, "reward_std": 1.0609780550003052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04146880173842385, "rewards/wordcountpos_reward/raw_geo/std": 0.04161429759772146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1207.6875, "completions/mean_terminated_length": 1188.2000732421875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.7581516303260653, "frac_reward_zero_std": 0.0, "grad_norm": 2.9384099930832366, "kl": 0.019866943359375, "learning_rate": 2.5129639306120467e-07, "loss": -0.0681, "num_tokens": 166000345.0, "reward": 0.0, "reward_std": 0.9133108854293823, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.055462438073150315, "rewards/wordcountpos_reward/raw_geo/std": 0.11982033336055446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1060.875, "completions/mean_terminated_length": 1060.875, "completions/min_length": 618.0, "completions/min_terminated_length": 618.0, "epoch": 0.7583516703340668, "frac_reward_zero_std": 0.0, "grad_norm": 4.104979404055139, "kl": 0.021575927734375, "learning_rate": 2.5106144682080937e-07, "loss": -0.0332, "num_tokens": 166049127.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0470837354660034, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010096963582141634, "rewards/wordcountpos_reward/raw_geo/std": 0.03882990002054737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1096.625, "completions/mean_terminated_length": 1096.625, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.7585517103420684, "frac_reward_zero_std": 0.0, "grad_norm": 3.439764871343131, "kl": 0.023406982421875, "learning_rate": 2.5082664634421025e-07, "loss": -0.0301, "num_tokens": 166092705.0, "reward": 0.0, "reward_std": 0.7773227095603943, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11006259615170653, "rewards/wordcountpos_reward/raw_geo/std": 0.05526804094817046, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 914.0, "completions/mean_terminated_length": 914.0, "completions/min_length": 504.0, "completions/min_terminated_length": 504.0, "epoch": 0.75875175035007, "frac_reward_zero_std": 0.0, "grad_norm": 3.7274789681586475, "kl": 0.017608642578125, "learning_rate": 2.505919917458966e-07, "loss": -0.0157, "num_tokens": 166127857.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9609361886978149, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05323715965282892, "rewards/wordcountpos_reward/raw_geo/std": 0.04916987686138914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1155.9375, "completions/mean_terminated_length": 1155.9375, "completions/min_length": 867.0, "completions/min_terminated_length": 867.0, "epoch": 0.7589517903580716, "frac_reward_zero_std": 0.0, "grad_norm": 2.5220781310597054, "kl": 0.01385498046875, "learning_rate": 2.503574831402876e-07, "loss": 0.0177, "num_tokens": 166169320.0, "reward": -3.725290298461914e-08, "reward_std": 1.0445029735565186, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007318558414976285, "rewards/wordcountpos_reward/raw_geo/std": 0.14443498631170207, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 1062.4375, "completions/mean_terminated_length": 1062.4375, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.7591518303660733, "frac_reward_zero_std": 0.0, "grad_norm": 3.538793061747457, "kl": 0.0188751220703125, "learning_rate": 2.501231206417304e-07, "loss": 0.0127, "num_tokens": 166201543.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7677043080329895, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014642257357822347, "rewards/wordcountpos_reward/raw_geo/std": 0.15500208818994834, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1161.8125, "completions/mean_terminated_length": 1161.8125, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.7593518703740748, "frac_reward_zero_std": 0.0, "grad_norm": 3.4969752831119614, "kl": 0.0177001953125, "learning_rate": 2.4988890436450143e-07, "loss": 0.0127, "num_tokens": 166242852.0, "reward": 0.0, "reward_std": 0.555396318435669, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10236613695693225, "rewards/wordcountpos_reward/raw_geo/std": 0.06247296265037729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1125.25, "completions/mean_terminated_length": 1125.25, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.7595519103820764, "frac_reward_zero_std": 0.0, "grad_norm": 2.8950550590899247, "kl": 0.0138702392578125, "learning_rate": 2.496548344228051e-07, "loss": -0.0175, "num_tokens": 166282120.0, "reward": 0.0, "reward_std": 0.8230171203613281, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17280568666386098, "rewards/wordcountpos_reward/raw_geo/std": 0.14206387730373393, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1154.5625, "completions/mean_terminated_length": 1154.5625, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.759751950390078, "frac_reward_zero_std": 0.0, "grad_norm": 2.0619376288166715, "kl": 0.0130462646484375, "learning_rate": 2.4942091093077564e-07, "loss": -0.0419, "num_tokens": 166333321.0, "reward": 0.0, "reward_std": 0.950013279914856, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13311331591690628, "rewards/wordcountpos_reward/raw_geo/std": 0.09733858877064576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1296.75, "completions/mean_terminated_length": 1296.75, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.7599519903980796, "frac_reward_zero_std": 0.0, "grad_norm": 2.890107780054847, "kl": 0.0155792236328125, "learning_rate": 2.491871340024748e-07, "loss": 0.0076, "num_tokens": 166379277.0, "reward": 7.450580596923828e-09, "reward_std": 1.0195438861846924, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.005632385192335406, "rewards/wordcountpos_reward/raw_geo/std": 0.10842730920613491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1184.875, "completions/mean_terminated_length": 1184.875, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.7601520304060813, "frac_reward_zero_std": 0.0, "grad_norm": 3.2785800915296597, "kl": 0.0230712890625, "learning_rate": 2.4895350375189343e-07, "loss": -0.0508, "num_tokens": 166420667.0, "reward": 7.450580596923828e-09, "reward_std": 1.0096807479858398, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09644334308782594, "rewards/wordcountpos_reward/raw_geo/std": 0.0890242951494828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1321.125, "completions/mean_terminated_length": 1261.5, "completions/min_length": 1088.0, "completions/min_terminated_length": 1088.0, "epoch": 0.7603520704140828, "frac_reward_zero_std": 0.0, "grad_norm": 3.1595734460629448, "kl": 0.01947021484375, "learning_rate": 2.4872002029295057e-07, "loss": -0.0591, "num_tokens": 166466941.0, "reward": 0.0, "reward_std": 0.7830872535705566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.238086379402748, "rewards/wordcountpos_reward/raw_geo/std": 0.2308289333294637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 1201.875, "completions/mean_terminated_length": 1066.3636474609375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.7605521104220844, "frac_reward_zero_std": 0.0, "grad_norm": 3.0432340213195554, "kl": 0.016326904296875, "learning_rate": 2.4848668373949413e-07, "loss": -0.0077, "num_tokens": 166509259.0, "reward": 0.0, "reward_std": 0.6575754284858704, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1792599446217895, "rewards/wordcountpos_reward/raw_geo/std": 0.14585166751462156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1303.875, "completions/mean_terminated_length": 1238.5, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.760752150430086, "frac_reward_zero_std": 0.0, "grad_norm": 2.6302271785049225, "kl": 0.01275634765625, "learning_rate": 2.4825349420529965e-07, "loss": -0.0082, "num_tokens": 166552945.0, "reward": -4.470348358154297e-08, "reward_std": 1.0600529909133911, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08188395916224915, "rewards/wordcountpos_reward/raw_geo/std": 0.07153824100778484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1138.9375, "completions/mean_terminated_length": 1114.86669921875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.7609521904380876, "frac_reward_zero_std": 0.0, "grad_norm": 3.1846954696520218, "kl": 0.019012451171875, "learning_rate": 2.4802045180407156e-07, "loss": -0.0857, "num_tokens": 166597216.0, "reward": 2.9802322387695312e-08, "reward_std": 0.692024290561676, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2016911494912371, "rewards/wordcountpos_reward/raw_geo/std": 0.20225353037009644, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1232.625, "completions/mean_terminated_length": 1111.0909423828125, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.7611522304460893, "frac_reward_zero_std": 0.0, "grad_norm": 3.1337700253648046, "kl": 0.020263671875, "learning_rate": 2.477875566494425e-07, "loss": -0.0295, "num_tokens": 166642114.0, "reward": 0.0, "reward_std": 0.8812708258628845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024878972304062094, "rewards/wordcountpos_reward/raw_geo/std": 0.0574664632407888, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1204.1875, "completions/mean_terminated_length": 1161.9285888671875, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.7613522704540908, "frac_reward_zero_std": 0.0, "grad_norm": 2.8429600748207884, "kl": 0.0167083740234375, "learning_rate": 2.4755480885497323e-07, "loss": 0.0063, "num_tokens": 166697701.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7306531071662903, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01649984629215557, "rewards/wordcountpos_reward/raw_geo/std": 0.13940547847474707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1379.875, "completions/mean_terminated_length": 1325.2728271484375, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.7615523104620924, "frac_reward_zero_std": 0.0, "grad_norm": 2.78913618212351, "kl": 0.014739990234375, "learning_rate": 2.473222085341524e-07, "loss": 0.003, "num_tokens": 166746083.0, "reward": -3.725290298461914e-08, "reward_std": 1.0617139339447021, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003619274925510811, "rewards/wordcountpos_reward/raw_geo/std": 0.06588774359684185, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101763, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1067.4375, "completions/mean_terminated_length": 1067.4375, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.761752350470094, "frac_reward_zero_std": 0.0, "grad_norm": 3.7846164556613093, "kl": 0.026702880859375, "learning_rate": 2.4708975580039715e-07, "loss": -0.0399, "num_tokens": 166779450.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8831357955932617, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018508262114733436, "rewards/wordcountpos_reward/raw_geo/std": 0.02570043490197555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1316.25, "completions/mean_terminated_length": 1304.0001220703125, "completions/min_length": 1149.0, "completions/min_terminated_length": 1149.0, "epoch": 0.7619523904780956, "frac_reward_zero_std": 0.0, "grad_norm": 1.2084473127466109, "kl": 0.00591278076171875, "learning_rate": 2.468574507670526e-07, "loss": -0.0034, "num_tokens": 166820990.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6615856885910034, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12481851231094615, "rewards/wordcountpos_reward/raw_geo/std": 0.1263222634542389, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1132.75, "completions/mean_terminated_length": 1132.75, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.7621524304860973, "frac_reward_zero_std": 0.0, "grad_norm": 2.955528268753257, "kl": 0.017822265625, "learning_rate": 2.4662529354739145e-07, "loss": -0.0049, "num_tokens": 166865290.0, "reward": 0.0, "reward_std": 0.5360229015350342, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10476033784200534, "rewards/wordcountpos_reward/raw_geo/std": 0.09821235064714214, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1202.5, "completions/mean_terminated_length": 1133.84619140625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.7623524704940988, "frac_reward_zero_std": 0.0, "grad_norm": 3.5927896336700456, "kl": 0.02099609375, "learning_rate": 2.4639328425461474e-07, "loss": 0.0118, "num_tokens": 166910986.0, "reward": -2.9802322387695312e-08, "reward_std": 0.895727276802063, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15073958127561293, "rewards/wordcountpos_reward/raw_geo/std": 0.25880846138294966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036263, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 1097.5, "completions/mean_terminated_length": 1097.5, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.7625525105021004, "frac_reward_zero_std": 0.0, "grad_norm": 3.576589746132569, "kl": 0.018798828125, "learning_rate": 2.4616142300185125e-07, "loss": -0.0379, "num_tokens": 166961354.0, "reward": 0.0, "reward_std": 0.9495494961738586, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01339682335155514, "rewards/wordcountpos_reward/raw_geo/std": 0.05974154488995871, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 988.4375, "completions/mean_terminated_length": 988.4375, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.762752550510102, "frac_reward_zero_std": 0.0, "grad_norm": 3.744780977664131, "kl": 0.02056884765625, "learning_rate": 2.459297099021578e-07, "loss": -0.0469, "num_tokens": 167006905.0, "reward": 0.0, "reward_std": 0.7529932260513306, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02813145294217563, "rewards/wordcountpos_reward/raw_geo/std": 0.035879486214495514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1225.5625, "completions/mean_terminated_length": 1186.357177734375, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.7629525905181036, "frac_reward_zero_std": 0.0, "grad_norm": 2.5476440003843406, "kl": 0.0130767822265625, "learning_rate": 2.4569814506851843e-07, "loss": 0.0083, "num_tokens": 167057978.0, "reward": -2.60770320892334e-08, "reward_std": 1.0489267110824585, "rewards/wordcountpos_reward/mean": -2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03741033631983366, "rewards/wordcountpos_reward/raw_geo/std": 0.08685303305948916, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1293.0, "completions/mean_length": 1108.0, "completions/mean_terminated_length": 1081.86669921875, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.7631526305261053, "frac_reward_zero_std": 0.0, "grad_norm": 2.5195699299068743, "kl": 0.0131988525390625, "learning_rate": 2.4546672861384523e-07, "loss": -0.0211, "num_tokens": 167098834.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9445218443870544, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03221221785470853, "rewards/wordcountpos_reward/raw_geo/std": 0.1627014408653895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1287.875, "completions/mean_terminated_length": 1122.888916015625, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.7633526705341068, "frac_reward_zero_std": 0.0, "grad_norm": 2.659894860732581, "kl": 0.0129547119140625, "learning_rate": 2.452354606509781e-07, "loss": 0.0337, "num_tokens": 167149960.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0093024969100952, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15823195044071756, "rewards/wordcountpos_reward/raw_geo/std": 0.14906539198810295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1071.75, "completions/mean_terminated_length": 1071.75, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.7635527105421084, "frac_reward_zero_std": 0.0, "grad_norm": 3.321573927147833, "kl": 0.016754150390625, "learning_rate": 2.4500434129268446e-07, "loss": -0.0301, "num_tokens": 167185844.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8061005473136902, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05486538325566955, "rewards/wordcountpos_reward/raw_geo/std": 0.13215395747047004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1072.25, "completions/mean_terminated_length": 1072.25, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.76375275055011, "frac_reward_zero_std": 0.0, "grad_norm": 3.522978706085106, "kl": 0.020172119140625, "learning_rate": 2.4477337065165864e-07, "loss": 0.0128, "num_tokens": 167236288.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9675652384757996, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12554741302795444, "rewards/wordcountpos_reward/raw_geo/std": 0.1970107515566271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 954.625, "completions/mean_terminated_length": 954.625, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.7639527905581116, "frac_reward_zero_std": 0.0, "grad_norm": 3.5517673095807702, "kl": 0.01788330078125, "learning_rate": 2.4454254884052347e-07, "loss": 0.029, "num_tokens": 167281122.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8358907699584961, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.031970826080959344, "rewards/wordcountpos_reward/raw_geo/std": 0.11098176416476135, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1274.25, "completions/mean_terminated_length": 1222.1539306640625, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.7641528305661133, "frac_reward_zero_std": 0.0, "grad_norm": 3.1326651570859045, "kl": 0.0156097412109375, "learning_rate": 2.4431187597182845e-07, "loss": -0.0095, "num_tokens": 167318174.0, "reward": -7.450580596923828e-09, "reward_std": 0.9413160085678101, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02656657450443457, "rewards/wordcountpos_reward/raw_geo/std": 0.09041648784674224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1206.0, "completions/mean_terminated_length": 1186.4000244140625, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.7643528705741148, "frac_reward_zero_std": 0.0, "grad_norm": 3.1635803296128966, "kl": 0.01812744140625, "learning_rate": 2.44081352158051e-07, "loss": -0.0086, "num_tokens": 167364286.0, "reward": 0.0, "reward_std": 0.9254043102264404, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.078951617794499, "rewards/wordcountpos_reward/raw_geo/std": 0.18410910583147153, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1182.125, "completions/mean_terminated_length": 1076.166748046875, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.7645529105821164, "frac_reward_zero_std": 0.0, "grad_norm": 3.5278571632844256, "kl": 0.0156402587890625, "learning_rate": 2.43850977511595e-07, "loss": 0.0514, "num_tokens": 167409384.0, "reward": 0.0, "reward_std": 0.8905699849128723, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06524949556756653, "rewards/wordcountpos_reward/raw_geo/std": 0.0863648637209229, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1152.625, "completions/mean_terminated_length": 1152.625, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.764752950590118, "frac_reward_zero_std": 0.0, "grad_norm": 3.030443241212008, "kl": 0.0165252685546875, "learning_rate": 2.4362075214479296e-07, "loss": 0.0161, "num_tokens": 167451226.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7893909215927124, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1359948402203186, "rewards/wordcountpos_reward/raw_geo/std": 0.3440864751410149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1392.9375, "completions/mean_terminated_length": 1309.6666259765625, "completions/min_length": 1166.0, "completions/min_terminated_length": 1166.0, "epoch": 0.7649529905981196, "frac_reward_zero_std": 0.0, "grad_norm": 2.2912470414895694, "kl": 0.0131072998046875, "learning_rate": 2.433906761699032e-07, "loss": 0.0207, "num_tokens": 167499233.0, "reward": 0.0, "reward_std": 0.6182544231414795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12235082987880296, "rewards/wordcountpos_reward/raw_geo/std": 0.24525855601550636, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1228.125, "completions/mean_terminated_length": 1104.5455322265625, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.7651530306061213, "frac_reward_zero_std": 0.0, "grad_norm": 3.2498984490150264, "kl": 0.0181884765625, "learning_rate": 2.4316074969911223e-07, "loss": 0.035, "num_tokens": 167546955.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6832290291786194, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.042610650007212286, "rewards/wordcountpos_reward/raw_geo/std": 0.03831093437444699, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 987.125, "completions/mean_terminated_length": 987.125, "completions/min_length": 629.0, "completions/min_terminated_length": 629.0, "epoch": 0.7653530706141228, "frac_reward_zero_std": 0.0, "grad_norm": 3.2979565937688022, "kl": 0.01837158203125, "learning_rate": 2.429309728445327e-07, "loss": -0.0368, "num_tokens": 167580261.0, "reward": 0.0, "reward_std": 0.7790560722351074, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09235314832692419, "rewards/wordcountpos_reward/raw_geo/std": 0.14748761138349936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 961.125, "completions/mean_terminated_length": 925.2000732421875, "completions/min_length": 676.0, "completions/min_terminated_length": 676.0, "epoch": 0.7655531106221244, "frac_reward_zero_std": 0.0, "grad_norm": 3.419957369904806, "kl": 0.021728515625, "learning_rate": 2.4270134571820546e-07, "loss": -0.0755, "num_tokens": 167618711.0, "reward": 0.0, "reward_std": 0.8905549645423889, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0790439573718003, "rewards/wordcountpos_reward/raw_geo/std": 0.23831361962764971, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1275.6875, "completions/mean_terminated_length": 1260.7333984375, "completions/min_length": 1072.0, "completions/min_terminated_length": 1072.0, "epoch": 0.7657531506301261, "frac_reward_zero_std": 0.0, "grad_norm": 2.8368658339960695, "kl": 0.0174713134765625, "learning_rate": 2.424718684320973e-07, "loss": 0.0245, "num_tokens": 167669930.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8829705119132996, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05091719145479316, "rewards/wordcountpos_reward/raw_geo/std": 0.06956970375066432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1062.25, "completions/mean_terminated_length": 1062.25, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.7659531906381276, "frac_reward_zero_std": 0.0, "grad_norm": 2.3788619856822297, "kl": 0.00897216796875, "learning_rate": 2.4224254109810265e-07, "loss": -0.0038, "num_tokens": 167708934.0, "reward": 0.0, "reward_std": 0.3671490252017975, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02664217958974558, "rewards/wordcountpos_reward/raw_geo/std": 0.1077190268315306, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1113.75, "completions/mean_terminated_length": 1088.0, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.7661532306461293, "frac_reward_zero_std": 0.0, "grad_norm": 3.550211019907993, "kl": 0.020233154296875, "learning_rate": 2.4201336382804237e-07, "loss": -0.0418, "num_tokens": 167749690.0, "reward": -7.450580596923828e-09, "reward_std": 1.0325617790222168, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.024735208674805026, "rewards/wordcountpos_reward/raw_geo/std": 0.11648030063689398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1215.25, "completions/mean_terminated_length": 1215.25, "completions/min_length": 1068.0, "completions/min_terminated_length": 1068.0, "epoch": 0.7663532706541308, "frac_reward_zero_std": 0.0, "grad_norm": 2.8929298055376202, "kl": 0.0122833251953125, "learning_rate": 2.4178433673366457e-07, "loss": 0.0149, "num_tokens": 167797750.0, "reward": 0.0, "reward_std": 0.9927129149436951, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11933872174021304, "rewards/wordcountpos_reward/raw_geo/std": 0.08634524223426923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970786, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1044.375, "completions/mean_terminated_length": 1044.375, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.7665533106621324, "frac_reward_zero_std": 0.0, "grad_norm": 3.1754895004815045, "kl": 0.018463134765625, "learning_rate": 2.415554599266436e-07, "loss": -0.036, "num_tokens": 167830084.0, "reward": 0.0, "reward_std": 0.935714840888977, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.30538404117944284, "rewards/wordcountpos_reward/raw_geo/std": 0.11921327082245502, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1107.9375, "completions/mean_terminated_length": 1107.9375, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.7667533506701341, "frac_reward_zero_std": 0.0, "grad_norm": 1.9875334013974102, "kl": 0.00616455078125, "learning_rate": 2.4132673351858106e-07, "loss": -0.026, "num_tokens": 167872043.0, "reward": 0.0, "reward_std": 0.9830300807952881, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058499508370905715, "rewards/wordcountpos_reward/raw_geo/std": 0.24601895469876253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1110.25, "completions/mean_terminated_length": 1110.25, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.7669533906781356, "frac_reward_zero_std": 0.0, "grad_norm": 2.8691205357487175, "kl": 0.0139617919921875, "learning_rate": 2.4109815762100485e-07, "loss": 0.0337, "num_tokens": 167921895.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9442375302314758, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15742609666001153, "rewards/wordcountpos_reward/raw_geo/std": 0.11151584974359112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1123.0, "completions/mean_terminated_length": 1123.0, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.7671534306861372, "frac_reward_zero_std": 0.0, "grad_norm": 3.588678742734059, "kl": 0.020721435546875, "learning_rate": 2.4086973234536984e-07, "loss": 0.0102, "num_tokens": 167974767.0, "reward": 0.0, "reward_std": 0.9722253084182739, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.31873565837305784, "rewards/wordcountpos_reward/raw_geo/std": 0.10158487087471814, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823631, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1432.75, "completions/mean_terminated_length": 1320.666748046875, "completions/min_length": 1184.0, "completions/min_terminated_length": 1184.0, "epoch": 0.7673534706941388, "frac_reward_zero_std": 0.0, "grad_norm": 2.50580641576194, "kl": 0.013275146484375, "learning_rate": 2.406414578030569e-07, "loss": 0.0209, "num_tokens": 168025475.0, "reward": 1.4901161193847656e-08, "reward_std": 1.021147608757019, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.010002124857895377, "rewards/wordcountpos_reward/raw_geo/std": 0.1776693937710082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1138.9375, "completions/mean_terminated_length": 1114.86669921875, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.7675535107021404, "frac_reward_zero_std": 0.0, "grad_norm": 2.5542200214284048, "kl": 0.0144805908203125, "learning_rate": 2.4041333410537396e-07, "loss": -0.0387, "num_tokens": 168072602.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8754941821098328, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1621607577903669, "rewards/wordcountpos_reward/raw_geo/std": 0.05057095889180813, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 1075.625, "completions/mean_terminated_length": 1075.625, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.7677535507101421, "frac_reward_zero_std": 0.0, "grad_norm": 2.860982629939735, "kl": 0.01544189453125, "learning_rate": 2.401853613635551e-07, "loss": 0.0071, "num_tokens": 168119780.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9984360933303833, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.005331200972058166, "rewards/wordcountpos_reward/raw_geo/std": 0.16068410139545716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787746, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1346.5, "completions/mean_terminated_length": 1311.0770263671875, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.7679535907181436, "frac_reward_zero_std": 0.0, "grad_norm": 2.6349671646930624, "kl": 0.0145111083984375, "learning_rate": 2.39957539688761e-07, "loss": -0.0164, "num_tokens": 168171036.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8287845849990845, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09707117134702666, "rewards/wordcountpos_reward/raw_geo/std": 0.09335132938401561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1265.125, "completions/mean_terminated_length": 1158.3636474609375, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.7681536307261452, "frac_reward_zero_std": 0.0, "grad_norm": 2.9903246196327973, "kl": 0.017608642578125, "learning_rate": 2.397298691920783e-07, "loss": 0.0286, "num_tokens": 168218742.0, "reward": 0.0, "reward_std": 0.5296944975852966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15828833035992784, "rewards/wordcountpos_reward/raw_geo/std": 0.23165641714962096, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978232, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1064.625, "completions/mean_terminated_length": 1035.60009765625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.7683536707341468, "frac_reward_zero_std": 0.0, "grad_norm": 3.2920549968406783, "kl": 0.019012451171875, "learning_rate": 2.3950234998452026e-07, "loss": 0.0395, "num_tokens": 168264672.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8043026924133301, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11403794678217594, "rewards/wordcountpos_reward/raw_geo/std": 0.06954510478269071, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455328, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 964.875, "completions/mean_terminated_length": 964.875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.7685537107421484, "frac_reward_zero_std": 0.0, "grad_norm": 3.548640408753703, "kl": 0.01776123046875, "learning_rate": 2.392749821770263e-07, "loss": 0.021, "num_tokens": 168299870.0, "reward": -2.2351741790771484e-08, "reward_std": 1.026021957397461, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0043512839844965625, "rewards/wordcountpos_reward/raw_geo/std": 0.07568497591452912, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 972.875, "completions/mean_terminated_length": 972.875, "completions/min_length": 613.0, "completions/min_terminated_length": 613.0, "epoch": 0.7687537507501501, "frac_reward_zero_std": 0.0, "grad_norm": 2.433971759472657, "kl": 0.01418304443359375, "learning_rate": 2.390477658804622e-07, "loss": -0.0669, "num_tokens": 168345108.0, "reward": 0.0, "reward_std": 0.9945483207702637, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05551071745570849, "rewards/wordcountpos_reward/raw_geo/std": 0.01983784065879704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.13601470508735444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1052.0, "completions/max_terminated_length": 1052.0, "completions/mean_length": 930.375, "completions/mean_terminated_length": 930.375, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.7689537907581516, "frac_reward_zero_std": 0.0, "grad_norm": 3.6743466026558114, "kl": 0.0164031982421875, "learning_rate": 2.3882070120561917e-07, "loss": -0.0176, "num_tokens": 168385842.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9471679925918579, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11669064320247066, "rewards/wordcountpos_reward/raw_geo/std": 0.061558120710054114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1029.3125, "completions/mean_terminated_length": 1029.3125, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.7691538307661532, "frac_reward_zero_std": 0.0, "grad_norm": 3.691493371546259, "kl": 0.02581787109375, "learning_rate": 2.385937882632155e-07, "loss": -0.0039, "num_tokens": 168431015.0, "reward": -1.4901161193847656e-08, "reward_std": 1.061337947845459, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04069464572329051, "rewards/wordcountpos_reward/raw_geo/std": 0.13652866374014674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1028.8125, "completions/mean_terminated_length": 1028.8125, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.7693538707741548, "frac_reward_zero_std": 0.0, "grad_norm": 2.4619260666858813, "kl": 0.0190887451171875, "learning_rate": 2.3836702716389463e-07, "loss": 0.0119, "num_tokens": 168467996.0, "reward": 1.862645149230957e-08, "reward_std": 1.0634291172027588, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11987147779405513, "rewards/wordcountpos_reward/raw_geo/std": 0.10474470585528775, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 961.625, "completions/mean_terminated_length": 961.625, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.7695539107821564, "frac_reward_zero_std": 0.0, "grad_norm": 3.91645574157724, "kl": 0.024261474609375, "learning_rate": 2.3814041801822653e-07, "loss": -0.0044, "num_tokens": 168518982.0, "reward": -4.470348358154297e-08, "reward_std": 0.9682698249816895, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04571970577414924, "rewards/wordcountpos_reward/raw_geo/std": 0.06183228452205261, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1391.3125, "completions/mean_terminated_length": 1282.625, "completions/min_length": 1129.0, "completions/min_terminated_length": 1129.0, "epoch": 0.7697539507901581, "frac_reward_zero_std": 0.0, "grad_norm": 2.827728564229854, "kl": 0.017730712890625, "learning_rate": 2.3791396093670643e-07, "loss": 0.0157, "num_tokens": 168573555.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8117923736572266, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029144240873692336, "rewards/wordcountpos_reward/raw_geo/std": 0.1160289469684629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1273.875, "completions/mean_terminated_length": 1273.875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.7699539907981596, "frac_reward_zero_std": 0.0, "grad_norm": 2.54294669759343, "kl": 0.0146484375, "learning_rate": 2.3768765602975645e-07, "loss": 0.0031, "num_tokens": 168622937.0, "reward": 0.0, "reward_std": 0.999935507774353, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19091799545643245, "rewards/wordcountpos_reward/raw_geo/std": 0.1755178313585205, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1136.875, "completions/mean_terminated_length": 1112.666748046875, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.7701540308061612, "frac_reward_zero_std": 0.0, "grad_norm": 3.572864005018517, "kl": 0.02166748046875, "learning_rate": 2.3746150340772342e-07, "loss": 0.001, "num_tokens": 168670783.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5246423482894897, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08023010304780111, "rewards/wordcountpos_reward/raw_geo/std": 0.07009871466235224, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1227.9375, "completions/mean_terminated_length": 1189.071533203125, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.7703540708141629, "frac_reward_zero_std": 0.0, "grad_norm": 2.8592178460303765, "kl": 0.014862060546875, "learning_rate": 2.3723550318088054e-07, "loss": -0.0419, "num_tokens": 168723886.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5151090025901794, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03337698774262525, "rewards/wordcountpos_reward/raw_geo/std": 0.14013899554298018, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1112.4375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.7705541108221644, "frac_reward_zero_std": 0.0, "grad_norm": 3.421205583693635, "kl": 0.016693115234375, "learning_rate": 2.3700965545942654e-07, "loss": 0.0131, "num_tokens": 168764533.0, "reward": 0.0, "reward_std": 0.9964163303375244, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04437970027221211, "rewards/wordcountpos_reward/raw_geo/std": 0.05196085168084551, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1102.25, "completions/mean_terminated_length": 1102.25, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.7707541508301661, "frac_reward_zero_std": 0.0, "grad_norm": 3.4581129604976777, "kl": 0.019317626953125, "learning_rate": 2.3678396035348593e-07, "loss": 0.031, "num_tokens": 168807121.0, "reward": 5.960464477539063e-08, "reward_std": 0.7659135460853577, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.016855619306081645, "rewards/wordcountpos_reward/raw_geo/std": 0.06865187108051647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1278.0, "completions/max_terminated_length": 1278.0, "completions/mean_length": 1043.4375, "completions/mean_terminated_length": 1043.4375, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.7709541908381676, "frac_reward_zero_std": 0.0, "grad_norm": 3.6412618374066823, "kl": 0.0174560546875, "learning_rate": 2.365584179731084e-07, "loss": 0.0361, "num_tokens": 168851952.0, "reward": 0.0, "reward_std": 0.8068356513977051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3024389026628911, "rewards/wordcountpos_reward/raw_geo/std": 0.18666796701533767, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1065.6875, "completions/mean_terminated_length": 1065.6875, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.7711542308461692, "frac_reward_zero_std": 0.0, "grad_norm": 3.5111564723035538, "kl": 0.013671875, "learning_rate": 2.363330284282696e-07, "loss": 0.0198, "num_tokens": 168888795.0, "reward": -3.725290298461914e-09, "reward_std": 1.047977328300476, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1531693728084361, "rewards/wordcountpos_reward/raw_geo/std": 0.09681295477061314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 991.0, "completions/mean_terminated_length": 991.0, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.7713542708541709, "frac_reward_zero_std": 0.0, "grad_norm": 2.4372951552010544, "kl": 0.01262664794921875, "learning_rate": 2.3610779182887044e-07, "loss": 0.0019, "num_tokens": 168922003.0, "reward": 0.0, "reward_std": 1.0669184923171997, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07041554421524959, "rewards/wordcountpos_reward/raw_geo/std": 0.06037105492080097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1220.5, "completions/mean_terminated_length": 1220.5, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.7715543108621724, "frac_reward_zero_std": 0.0, "grad_norm": 2.506490202554591, "kl": 0.0138702392578125, "learning_rate": 2.3588270828473753e-07, "loss": -0.0185, "num_tokens": 168955947.0, "reward": -3.725290298461914e-09, "reward_std": 1.0472694635391235, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0877645580502747, "rewards/wordcountpos_reward/raw_geo/std": 0.06281278029064075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1216.0625, "completions/mean_terminated_length": 1175.5, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.7717543508701741, "frac_reward_zero_std": 0.0, "grad_norm": 3.4054936144539556, "kl": 0.020904541015625, "learning_rate": 2.3565777790562218e-07, "loss": -0.0135, "num_tokens": 169005700.0, "reward": 2.9802322387695312e-08, "reward_std": 0.798215389251709, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08094525012886776, "rewards/wordcountpos_reward/raw_geo/std": 0.07870716726119449, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1182.0625, "completions/mean_terminated_length": 1136.6429443359375, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.7719543908781756, "frac_reward_zero_std": 0.0, "grad_norm": 3.06389094829168, "kl": 0.014862060546875, "learning_rate": 2.354330008012018e-07, "loss": -0.0677, "num_tokens": 169054189.0, "reward": 0.0, "reward_std": 0.6458553075790405, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061125820975418646, "rewards/wordcountpos_reward/raw_geo/std": 0.16986216861169534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1310.4375, "completions/mean_terminated_length": 1196.7000732421875, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.7721544308861772, "frac_reward_zero_std": 0.0, "grad_norm": 2.784449745161841, "kl": 0.0137481689453125, "learning_rate": 2.352083770810787e-07, "loss": -0.0109, "num_tokens": 169102532.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0315487384796143, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1424976779934597, "rewards/wordcountpos_reward/raw_geo/std": 0.05940543140065276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1074.125, "completions/mean_terminated_length": 1074.125, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.7723544708941789, "frac_reward_zero_std": 0.0, "grad_norm": 3.314734527260716, "kl": 0.0153045654296875, "learning_rate": 2.3498390685478042e-07, "loss": -0.0212, "num_tokens": 169142606.0, "reward": 0.0, "reward_std": 1.0439139604568481, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021239234541160855, "rewards/wordcountpos_reward/raw_geo/std": 0.02857194021111938, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1306.625, "completions/mean_terminated_length": 1113.25, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.7725545109021804, "frac_reward_zero_std": 0.0, "grad_norm": 2.942955088513631, "kl": 0.0143280029296875, "learning_rate": 2.347595902317595e-07, "loss": 0.029, "num_tokens": 169191520.0, "reward": 0.0, "reward_std": 1.0496273040771484, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0021754387677495853, "rewards/wordcountpos_reward/raw_geo/std": 0.09757012346714078, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1394.0, "completions/mean_length": 1219.125, "completions/mean_terminated_length": 1200.4000244140625, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.7727545509101821, "frac_reward_zero_std": 0.0, "grad_norm": 2.9788942901507216, "kl": 0.014801025390625, "learning_rate": 2.3453542732139388e-07, "loss": 0.027, "num_tokens": 169229602.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9599794745445251, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12156979160755406, "rewards/wordcountpos_reward/raw_geo/std": 0.10630747010036232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1284.25, "completions/mean_terminated_length": 1284.25, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.7729545909181836, "frac_reward_zero_std": 0.0, "grad_norm": 2.1572782843653573, "kl": 0.0111236572265625, "learning_rate": 2.3431141823298657e-07, "loss": -0.0083, "num_tokens": 169280614.0, "reward": 4.0978193283081055e-08, "reward_std": 1.0027799606323242, "rewards/wordcountpos_reward/mean": 4.0978193283081055e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1421666603207767, "rewards/wordcountpos_reward/raw_geo/std": 0.07777957800862315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1270.125, "completions/mean_terminated_length": 1237.2857666015625, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.7731546309261852, "frac_reward_zero_std": 0.0, "grad_norm": 3.067516096012293, "kl": 0.0156097412109375, "learning_rate": 2.3408756307576498e-07, "loss": -0.0462, "num_tokens": 169314664.0, "reward": 2.9802322387695312e-08, "reward_std": 0.545816957950592, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06597390933467646, "rewards/wordcountpos_reward/raw_geo/std": 0.1597501524920784, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1216.75, "completions/mean_terminated_length": 1216.75, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.7733546709341869, "frac_reward_zero_std": 0.0, "grad_norm": 2.5901836543226637, "kl": 0.0126190185546875, "learning_rate": 2.3386386195888228e-07, "loss": -0.0189, "num_tokens": 169352300.0, "reward": 0.0, "reward_std": 0.535529375076294, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.054005184120612566, "rewards/wordcountpos_reward/raw_geo/std": 0.055042002900870815, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1245.1875, "completions/mean_terminated_length": 1228.2000732421875, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.7735547109421884, "frac_reward_zero_std": 0.0, "grad_norm": 3.5945376725377867, "kl": 0.015716552734375, "learning_rate": 2.33640314991416e-07, "loss": 0.021, "num_tokens": 169403783.0, "reward": 7.450580596923828e-09, "reward_std": 0.9964346885681152, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.17334235076971802, "rewards/wordcountpos_reward/raw_geo/std": 0.22458380808164044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0455420034042649, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1283.1875, "completions/mean_terminated_length": 1210.916748046875, "completions/min_length": 969.0, "completions/min_terminated_length": 969.0, "epoch": 0.7737547509501901, "frac_reward_zero_std": 0.0, "grad_norm": 3.2557739100085534, "kl": 0.01702880859375, "learning_rate": 2.3341692228236882e-07, "loss": 0.0102, "num_tokens": 169451162.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0280250310897827, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.043010301476479634, "rewards/wordcountpos_reward/raw_geo/std": 0.0500494601866915, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1268.75, "completions/mean_terminated_length": 1215.3846435546875, "completions/min_length": 1062.0, "completions/min_terminated_length": 1062.0, "epoch": 0.7739547909581916, "frac_reward_zero_std": 0.0, "grad_norm": 3.228948054980583, "kl": 0.018585205078125, "learning_rate": 2.3319368394066763e-07, "loss": -0.0221, "num_tokens": 169500110.0, "reward": 0.0, "reward_std": 0.9968574047088623, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09513167638996807, "rewards/wordcountpos_reward/raw_geo/std": 0.15711450682540748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1240.0, "completions/max_terminated_length": 1240.0, "completions/mean_length": 1098.625, "completions/mean_terminated_length": 1098.625, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.7741548309661932, "frac_reward_zero_std": 0.0, "grad_norm": 2.058890928399543, "kl": 0.00832366943359375, "learning_rate": 2.3297060007516502e-07, "loss": -0.0066, "num_tokens": 169538984.0, "reward": 0.0, "reward_std": 0.9130655527114868, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.068447148372389, "rewards/wordcountpos_reward/raw_geo/std": 0.07834774793047171, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1138.9375, "completions/mean_terminated_length": 1114.86669921875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.7743548709741949, "frac_reward_zero_std": 0.0, "grad_norm": 2.577975011804696, "kl": 0.0126800537109375, "learning_rate": 2.3274767079463722e-07, "loss": -0.023, "num_tokens": 169577303.0, "reward": 0.0, "reward_std": 0.9473124742507935, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12266066214706473, "rewards/wordcountpos_reward/raw_geo/std": 0.07736713795296143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1288.875, "completions/mean_terminated_length": 1192.9091796875, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.7745549109821964, "frac_reward_zero_std": 0.0, "grad_norm": 3.0182694138274004, "kl": 0.0189056396484375, "learning_rate": 2.32524896207786e-07, "loss": 0.0002, "num_tokens": 169627845.0, "reward": 0.0, "reward_std": 0.9717332720756531, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02714112980432299, "rewards/wordcountpos_reward/raw_geo/std": 0.15619563262068828, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1273.75, "completions/mean_terminated_length": 1198.3333740234375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.7747549509901981, "frac_reward_zero_std": 0.0, "grad_norm": 3.016529607453828, "kl": 0.0155181884765625, "learning_rate": 2.3230227642323664e-07, "loss": 0.0055, "num_tokens": 169681601.0, "reward": 0.0, "reward_std": 0.39681485295295715, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.048775532047555366, "rewards/wordcountpos_reward/raw_geo/std": 0.06353550217560912, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.16049691355057039, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 946.0, "completions/mean_length": 839.0, "completions/mean_terminated_length": 794.933349609375, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.7749549909981996, "frac_reward_zero_std": 0.0, "grad_norm": 2.352478692998981, "kl": 0.0090789794921875, "learning_rate": 2.3207981154954023e-07, "loss": -0.062, "num_tokens": 169722345.0, "reward": 0.0, "reward_std": 0.9767192602157593, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08199884302256252, "rewards/wordcountpos_reward/raw_geo/std": 0.1041184192854649, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1593970119149271, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1319.8125, "completions/mean_terminated_length": 1088.1429443359375, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.7751550310062012, "frac_reward_zero_std": 0.0, "grad_norm": 2.8635103195410947, "kl": 0.011322021484375, "learning_rate": 2.3185750169517127e-07, "loss": -0.048, "num_tokens": 169768598.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8331983685493469, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19369064705004058, "rewards/wordcountpos_reward/raw_geo/std": 0.24567022618374307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1035.6875, "completions/mean_terminated_length": 1035.6875, "completions/min_length": 809.0, "completions/min_terminated_length": 809.0, "epoch": 0.7753550710142029, "frac_reward_zero_std": 0.0, "grad_norm": 3.5812052342512373, "kl": 0.0162200927734375, "learning_rate": 2.3163534696852908e-07, "loss": 0.0058, "num_tokens": 169812209.0, "reward": 0.0, "reward_std": 0.8370190262794495, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11039824515710148, "rewards/wordcountpos_reward/raw_geo/std": 0.17697657251988036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1069.3125, "completions/mean_terminated_length": 1069.3125, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.7755551110222044, "frac_reward_zero_std": 0.0, "grad_norm": 3.969933804549233, "kl": 0.02191162109375, "learning_rate": 2.314133474779374e-07, "loss": -0.0527, "num_tokens": 169853374.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7378475666046143, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06209966938449468, "rewards/wordcountpos_reward/raw_geo/std": 0.11635067315026648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1266.5625, "completions/mean_terminated_length": 1233.21435546875, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.7757551510302061, "frac_reward_zero_std": 0.0, "grad_norm": 3.287053386935251, "kl": 0.019256591796875, "learning_rate": 2.311915033316443e-07, "loss": -0.063, "num_tokens": 169905087.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9612833261489868, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19834138429059078, "rewards/wordcountpos_reward/raw_geo/std": 0.37838592800070237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.1002773930432755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1024.25, "completions/mean_terminated_length": 1024.25, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.7759551910382076, "frac_reward_zero_std": 0.0, "grad_norm": 3.663989072718065, "kl": 0.01873779296875, "learning_rate": 2.309698146378217e-07, "loss": -0.0603, "num_tokens": 169952499.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7962995767593384, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03436543754123051, "rewards/wordcountpos_reward/raw_geo/std": 0.16734019249260704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1641476300299351, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1200.0625, "completions/mean_terminated_length": 1200.0625, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.7761552310462092, "frac_reward_zero_std": 0.0, "grad_norm": 3.0228306916260133, "kl": 0.015472412109375, "learning_rate": 2.3074828150456615e-07, "loss": 0.0144, "num_tokens": 169992684.0, "reward": 0.0, "reward_std": 0.8990752696990967, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006905573772100754, "rewards/wordcountpos_reward/raw_geo/std": 0.048657075200210245, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1103.0, "completions/max_terminated_length": 1103.0, "completions/mean_length": 1013.9375, "completions/mean_terminated_length": 1013.9375, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.7763552710542109, "frac_reward_zero_std": 0.0, "grad_norm": 2.235656827804275, "kl": 0.0089263916015625, "learning_rate": 2.3052690403989828e-07, "loss": -0.014, "num_tokens": 170041059.0, "reward": -5.960464477539063e-08, "reward_std": 0.8724918365478516, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01952821338972678, "rewards/wordcountpos_reward/raw_geo/std": 0.12003691605677773, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1097.5, "completions/mean_terminated_length": 1097.5, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.7765553110622124, "frac_reward_zero_std": 0.0, "grad_norm": 3.1971816157228137, "kl": 0.0182647705078125, "learning_rate": 2.3030568235176284e-07, "loss": -0.0391, "num_tokens": 170087251.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0047129392623901, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10832683278756117, "rewards/wordcountpos_reward/raw_geo/std": 0.24510497966182102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1017.9375, "completions/mean_terminated_length": 1017.9375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.7767553510702141, "frac_reward_zero_std": 0.0, "grad_norm": 3.388334569521217, "kl": 0.0213470458984375, "learning_rate": 2.3008461654802817e-07, "loss": -0.0691, "num_tokens": 170140018.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9617948532104492, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07319451472747743, "rewards/wordcountpos_reward/raw_geo/std": 0.06590037237715839, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1210.0, "completions/mean_terminated_length": 1190.666748046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.7769553910782157, "frac_reward_zero_std": 0.0, "grad_norm": 3.4456490522711514, "kl": 0.018402099609375, "learning_rate": 2.2986370673648722e-07, "loss": -0.0517, "num_tokens": 170178194.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0581620931625366, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03988347977620457, "rewards/wordcountpos_reward/raw_geo/std": 0.049150860641286646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 980.0625, "completions/mean_terminated_length": 980.0625, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.7771554310862172, "frac_reward_zero_std": 0.0, "grad_norm": 3.178690128705259, "kl": 0.01763916015625, "learning_rate": 2.296429530248566e-07, "loss": -0.0387, "num_tokens": 170218907.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7277988195419312, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16216426118213617, "rewards/wordcountpos_reward/raw_geo/std": 0.18683733208278203, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1174.5, "completions/mean_terminated_length": 1174.5, "completions/min_length": 1061.0, "completions/min_terminated_length": 1061.0, "epoch": 0.7773554710942189, "frac_reward_zero_std": 0.0, "grad_norm": 2.707483085539493, "kl": 0.0127410888671875, "learning_rate": 2.2942235552077692e-07, "loss": -0.0154, "num_tokens": 170264651.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9940789937973022, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16232584160556102, "rewards/wordcountpos_reward/raw_geo/std": 0.05536679185973353, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1289.625, "completions/mean_terminated_length": 1275.60009765625, "completions/min_length": 1039.0, "completions/min_terminated_length": 1039.0, "epoch": 0.7775555111022204, "frac_reward_zero_std": 0.0, "grad_norm": 3.336340226638322, "kl": 0.0196533203125, "learning_rate": 2.2920191433181226e-07, "loss": 0.0194, "num_tokens": 170309245.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7691245079040527, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.25116457826757466, "rewards/wordcountpos_reward/raw_geo/std": 0.08594361488633803, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11147163731607214, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1234.1875, "completions/mean_terminated_length": 1172.84619140625, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.7777555511102221, "frac_reward_zero_std": 0.0, "grad_norm": 3.064937208578514, "kl": 0.016204833984375, "learning_rate": 2.2898162956545088e-07, "loss": -0.0296, "num_tokens": 170352560.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0419869422912598, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03545804611708971, "rewards/wordcountpos_reward/raw_geo/std": 0.09079570125726771, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1078.0, "completions/max_terminated_length": 1078.0, "completions/mean_length": 893.5, "completions/mean_terminated_length": 893.5, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.7779555911182237, "frac_reward_zero_std": 0.0, "grad_norm": 3.2271966039209734, "kl": 0.014739990234375, "learning_rate": 2.2876150132910466e-07, "loss": 0.0143, "num_tokens": 170401056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4466981589794159, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03849856003761735, "rewards/wordcountpos_reward/raw_geo/std": 0.05709128119296295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1295.5, "completions/mean_terminated_length": 1281.86669921875, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.7781556311262252, "frac_reward_zero_std": 0.0, "grad_norm": 2.350434626668018, "kl": 0.00835418701171875, "learning_rate": 2.285415297301092e-07, "loss": -0.0058, "num_tokens": 170445912.0, "reward": -5.960464477539063e-08, "reward_std": 0.7795599699020386, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01696876118460403, "rewards/wordcountpos_reward/raw_geo/std": 0.1453373837347326, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1102.8125, "completions/mean_terminated_length": 1102.8125, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.7783556711342269, "frac_reward_zero_std": 0.0, "grad_norm": 3.5071043438283898, "kl": 0.01776123046875, "learning_rate": 2.283217148757233e-07, "loss": 0.0158, "num_tokens": 170493213.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6501206159591675, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04672253542702395, "rewards/wordcountpos_reward/raw_geo/std": 0.057501033572056506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1089.875, "completions/mean_terminated_length": 1089.875, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.7785557111422284, "frac_reward_zero_std": 0.0, "grad_norm": 2.605375740057792, "kl": 0.01067352294921875, "learning_rate": 2.281020568731302e-07, "loss": -0.0231, "num_tokens": 170539827.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0549794435501099, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016727447114159785, "rewards/wordcountpos_reward/raw_geo/std": 0.11457302813259372, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.12881223774390613, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1254.8125, "completions/mean_terminated_length": 1238.4666748046875, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.77875575115023, "frac_reward_zero_std": 0.0, "grad_norm": 2.74872159988953, "kl": 0.0156097412109375, "learning_rate": 2.278825558294357e-07, "loss": -0.0441, "num_tokens": 170582640.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9757705926895142, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17672188138505834, "rewards/wordcountpos_reward/raw_geo/std": 0.22642951131794373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 1015.625, "completions/mean_terminated_length": 983.3333740234375, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.7789557911582317, "frac_reward_zero_std": 0.0, "grad_norm": 2.57856664299626, "kl": 0.0122833251953125, "learning_rate": 2.276632118516699e-07, "loss": 0.0427, "num_tokens": 170617482.0, "reward": 5.960464477539063e-08, "reward_std": 0.7504956722259521, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003455528297476072, "rewards/wordcountpos_reward/raw_geo/std": 0.03806121500570601, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1284.1875, "completions/mean_terminated_length": 1269.800048828125, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.7791558311662332, "frac_reward_zero_std": 0.0, "grad_norm": 2.5255208799639575, "kl": 0.0139312744140625, "learning_rate": 2.274440250467854e-07, "loss": -0.0165, "num_tokens": 170653925.0, "reward": -1.4901161193847656e-08, "reward_std": 1.053844928741455, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13905402220393143, "rewards/wordcountpos_reward/raw_geo/std": 0.07711997960933611, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1129.875, "completions/mean_terminated_length": 1129.875, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.7793558711742349, "frac_reward_zero_std": 0.0, "grad_norm": 3.0580283872390974, "kl": 0.0142669677734375, "learning_rate": 2.272249955216593e-07, "loss": -0.0147, "num_tokens": 170700395.0, "reward": 0.0, "reward_std": 0.9303553104400635, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19164140634382704, "rewards/wordcountpos_reward/raw_geo/std": 0.225905804649451, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1354690069789096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 1048.3125, "completions/mean_terminated_length": 1018.2000732421875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.7795559111822364, "frac_reward_zero_std": 0.0, "grad_norm": 3.4609096190894246, "kl": 0.018646240234375, "learning_rate": 2.2700612338309086e-07, "loss": 0.0173, "num_tokens": 170737256.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5702844262123108, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11746017066917455, "rewards/wordcountpos_reward/raw_geo/std": 0.181259003756066, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1105.6875, "completions/mean_terminated_length": 1105.6875, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.779755951190238, "frac_reward_zero_std": 0.0, "grad_norm": 3.6843036047701743, "kl": 0.02313232421875, "learning_rate": 2.267874087378034e-07, "loss": 0.0489, "num_tokens": 170789171.0, "reward": 0.0, "reward_std": 0.7244072556495667, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01591655008294467, "rewards/wordcountpos_reward/raw_geo/std": 0.11149028427275019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1171.9375, "completions/mean_terminated_length": 1022.8182373046875, "completions/min_length": 613.0, "completions/min_terminated_length": 613.0, "epoch": 0.7799559911982397, "frac_reward_zero_std": 0.0, "grad_norm": 3.0935831115002026, "kl": 0.0216064453125, "learning_rate": 2.2656885169244304e-07, "loss": -0.0382, "num_tokens": 170846402.0, "reward": 0.0, "reward_std": 0.7666501998901367, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.019154874458441627, "rewards/wordcountpos_reward/raw_geo/std": 0.04795629213717553, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125754, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1137.3125, "completions/mean_terminated_length": 1085.5, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.7801560312062412, "frac_reward_zero_std": 0.0, "grad_norm": 2.6636253792637805, "kl": 0.0116424560546875, "learning_rate": 2.263504523535795e-07, "loss": 0.0067, "num_tokens": 170886223.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8144294619560242, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -5.191491543483093e-05, "rewards/wordcountpos_reward/raw_geo/std": 0.06739855793282819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1331.0, "completions/max_terminated_length": 1331.0, "completions/mean_length": 1203.0, "completions/mean_terminated_length": 1203.0, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.7803560712142429, "frac_reward_zero_std": 0.0, "grad_norm": 3.3327433088430958, "kl": 0.018341064453125, "learning_rate": 2.2613221082770487e-07, "loss": -0.0244, "num_tokens": 170927599.0, "reward": 1.862645149230957e-08, "reward_std": 1.0647945404052734, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02267368725993745, "rewards/wordcountpos_reward/raw_geo/std": 0.06775511382075528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1169.375, "completions/mean_terminated_length": 1122.1429443359375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.7805561112222444, "frac_reward_zero_std": 0.0, "grad_norm": 2.6106125601435743, "kl": 0.0137786865234375, "learning_rate": 2.259141272212349e-07, "loss": -0.0009, "num_tokens": 170969949.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9728251695632935, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23756368050532958, "rewards/wordcountpos_reward/raw_geo/std": 0.1965490908176176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1023.6875, "completions/mean_terminated_length": 1023.6875, "completions/min_length": 588.0, "completions/min_terminated_length": 588.0, "epoch": 0.780756151230246, "frac_reward_zero_std": 0.0, "grad_norm": 3.287705169566378, "kl": 0.0150909423828125, "learning_rate": 2.2569620164050813e-07, "loss": 0.0231, "num_tokens": 171015928.0, "reward": 0.0, "reward_std": 0.7631747126579285, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.024121725935805964, "rewards/wordcountpos_reward/raw_geo/std": 0.26971675540876505, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1209.25, "completions/mean_terminated_length": 1142.1539306640625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.7809561912382477, "frac_reward_zero_std": 0.0, "grad_norm": 3.3649393620526626, "kl": 0.02166748046875, "learning_rate": 2.2547843419178622e-07, "loss": -0.0515, "num_tokens": 171059372.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9154149293899536, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22546236622760313, "rewards/wordcountpos_reward/raw_geo/std": 0.3455484995198136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1132.0625, "completions/mean_terminated_length": 1132.0625, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.7811562312462492, "frac_reward_zero_std": 0.0, "grad_norm": 2.6685200023456885, "kl": 0.0151214599609375, "learning_rate": 2.2526082498125325e-07, "loss": 0.0189, "num_tokens": 171106021.0, "reward": 0.0, "reward_std": 0.8813483715057373, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12865452380614867, "rewards/wordcountpos_reward/raw_geo/std": 0.0627503309506909, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1204.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 952.25, "completions/mean_terminated_length": 952.25, "completions/min_length": 655.0, "completions/min_terminated_length": 655.0, "epoch": 0.7813562712542509, "frac_reward_zero_std": 0.0, "grad_norm": 3.1933117175721946, "kl": 0.02716064453125, "learning_rate": 2.2504337411501664e-07, "loss": -0.0156, "num_tokens": 171150881.0, "reward": 0.0, "reward_std": 0.9092789888381958, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06258975983559045, "rewards/wordcountpos_reward/raw_geo/std": 0.20755434464734332, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1008.25, "completions/mean_terminated_length": 1008.25, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.7815563112622524, "frac_reward_zero_std": 0.0, "grad_norm": 3.377708851829283, "kl": 0.01824951171875, "learning_rate": 2.2482608169910643e-07, "loss": -0.0192, "num_tokens": 171193829.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8621907234191895, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005881725044206837, "rewards/wordcountpos_reward/raw_geo/std": 0.12324048898918448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1120.875, "completions/mean_terminated_length": 1095.60009765625, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.781756351270254, "frac_reward_zero_std": 0.0, "grad_norm": 3.7049315629845174, "kl": 0.0171051025390625, "learning_rate": 2.2460894783947547e-07, "loss": -0.0719, "num_tokens": 171230059.0, "reward": 0.0, "reward_std": 0.830165684223175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09699354709664475, "rewards/wordcountpos_reward/raw_geo/std": 0.030291406339990876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1333.125, "completions/mean_terminated_length": 1257.272705078125, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.7819563912782557, "frac_reward_zero_std": 0.0, "grad_norm": 2.7475343089916495, "kl": 0.013824462890625, "learning_rate": 2.2439197264199906e-07, "loss": -0.0013, "num_tokens": 171278277.0, "reward": -1.4901161193847656e-08, "reward_std": 1.010428547859192, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00013972723957510636, "rewards/wordcountpos_reward/raw_geo/std": 0.07141201689586076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.077817450199525, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1152.75, "completions/mean_terminated_length": 1129.60009765625, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.7821564312862572, "frac_reward_zero_std": 0.0, "grad_norm": 3.2217132873308163, "kl": 0.0167999267578125, "learning_rate": 2.241751562124753e-07, "loss": 0.0219, "num_tokens": 171329249.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5722917318344116, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13996513330841226, "rewards/wordcountpos_reward/raw_geo/std": 0.7342193304421727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1276.0625, "completions/mean_terminated_length": 1174.272705078125, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.7823564712942589, "frac_reward_zero_std": 0.0, "grad_norm": 3.247124559113396, "kl": 0.018585205078125, "learning_rate": 2.2395849865662487e-07, "loss": -0.0108, "num_tokens": 171381594.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9297438263893127, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.27395590579676915, "rewards/wordcountpos_reward/raw_geo/std": 0.16292723875061957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 997.4375, "completions/mean_terminated_length": 997.4375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.7825565113022604, "frac_reward_zero_std": 0.0, "grad_norm": 3.152193514801626, "kl": 0.0164642333984375, "learning_rate": 2.237420000800912e-07, "loss": 0.0247, "num_tokens": 171430905.0, "reward": -5.960464477539063e-08, "reward_std": 0.8500829935073853, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24658203858732267, "rewards/wordcountpos_reward/raw_geo/std": 0.17420158596388421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1118.125, "completions/mean_terminated_length": 1118.125, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.782756551310262, "frac_reward_zero_std": 0.0, "grad_norm": 2.6798667768847557, "kl": 0.0115203857421875, "learning_rate": 2.2352566058843948e-07, "loss": -0.0151, "num_tokens": 171467763.0, "reward": 0.0, "reward_std": 0.6751932501792908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1698472382627651, "rewards/wordcountpos_reward/raw_geo/std": 0.04732071763828478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 1020.75, "completions/mean_terminated_length": 1020.75, "completions/min_length": 841.0, "completions/min_terminated_length": 841.0, "epoch": 0.7829565913182637, "frac_reward_zero_std": 0.0, "grad_norm": 3.820565010797347, "kl": 0.018341064453125, "learning_rate": 2.2330948028715846e-07, "loss": 0.0118, "num_tokens": 171510031.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8424680829048157, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07694331508168396, "rewards/wordcountpos_reward/raw_geo/std": 0.08347066526168855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792297, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 1153.5, "completions/mean_terminated_length": 1153.5, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.7831566313262652, "frac_reward_zero_std": 0.0, "grad_norm": 2.7975503967980493, "kl": 0.013519287109375, "learning_rate": 2.2309345928165814e-07, "loss": 0.0033, "num_tokens": 171555911.0, "reward": -2.9802322387695312e-08, "reward_std": 0.871249794960022, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0702347699445915, "rewards/wordcountpos_reward/raw_geo/std": 0.091856255557075, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1032.0, "completions/mean_terminated_length": 1032.0, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.7833566713342669, "frac_reward_zero_std": 0.0, "grad_norm": 2.9270190189570613, "kl": 0.0165252685546875, "learning_rate": 2.2287759767727175e-07, "loss": -0.0254, "num_tokens": 171599495.0, "reward": 0.0, "reward_std": 0.8516414165496826, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04675275964958439, "rewards/wordcountpos_reward/raw_geo/std": 0.038495601567186725, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1211.9375, "completions/mean_terminated_length": 1211.9375, "completions/min_length": 1082.0, "completions/min_terminated_length": 1082.0, "epoch": 0.7835567113422685, "frac_reward_zero_std": 0.0, "grad_norm": 2.483860903644977, "kl": 0.011383056640625, "learning_rate": 2.2266189557925396e-07, "loss": 0.0213, "num_tokens": 171634238.0, "reward": 0.0, "reward_std": 0.8236284852027893, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08308087832099899, "rewards/wordcountpos_reward/raw_geo/std": 0.2558987640753461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1131.1875, "completions/mean_terminated_length": 1078.5, "completions/min_length": 728.0, "completions/min_terminated_length": 728.0, "epoch": 0.78375675135027, "frac_reward_zero_std": 0.0, "grad_norm": 3.3682733474435036, "kl": 0.0176544189453125, "learning_rate": 2.224463530927823e-07, "loss": -0.0689, "num_tokens": 171680337.0, "reward": 0.0, "reward_std": 0.9116764068603516, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.050192307888021044, "rewards/wordcountpos_reward/raw_geo/std": 0.056441046990197925, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.1538999193800477, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1158.75, "completions/mean_terminated_length": 1158.75, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.7839567913582717, "frac_reward_zero_std": 0.0, "grad_norm": 2.5966701609892278, "kl": 0.0121002197265625, "learning_rate": 2.2223097032295643e-07, "loss": -0.0041, "num_tokens": 171712381.0, "reward": 0.0, "reward_std": 0.9058694243431091, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00915860247481724, "rewards/wordcountpos_reward/raw_geo/std": 0.04807363209990852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1263.0625, "completions/mean_terminated_length": 1155.3636474609375, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.7841568313662732, "frac_reward_zero_std": 0.0, "grad_norm": 3.3470918354146506, "kl": 0.019256591796875, "learning_rate": 2.220157473747975e-07, "loss": 0.0089, "num_tokens": 171761294.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8949859738349915, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06217956646349125, "rewards/wordcountpos_reward/raw_geo/std": 0.13942622347592107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 984.125, "completions/mean_terminated_length": 984.125, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.7843568713742749, "frac_reward_zero_std": 0.0, "grad_norm": 3.5346378053023595, "kl": 0.0205078125, "learning_rate": 2.2180068435324972e-07, "loss": -0.0445, "num_tokens": 171807712.0, "reward": -2.9802322387695312e-08, "reward_std": 0.828606367111206, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15353644055785176, "rewards/wordcountpos_reward/raw_geo/std": 0.13491499331943418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1204.75, "completions/mean_terminated_length": 1204.75, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.7845569113822765, "frac_reward_zero_std": 0.0, "grad_norm": 2.975495374043415, "kl": 0.0211181640625, "learning_rate": 2.2158578136317851e-07, "loss": 0.0125, "num_tokens": 171858476.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9634436368942261, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0261687950897763, "rewards/wordcountpos_reward/raw_geo/std": 0.08624775627840747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1152.875, "completions/mean_terminated_length": 1129.7333984375, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.784756951390278, "frac_reward_zero_std": 0.0, "grad_norm": 3.4936492543978033, "kl": 0.020751953125, "learning_rate": 2.2137103850937174e-07, "loss": -0.0002, "num_tokens": 171908890.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7755042314529419, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.007584422910032879, "rewards/wordcountpos_reward/raw_geo/std": 0.08179057675258068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1253.9375, "completions/mean_terminated_length": 1218.7857666015625, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.7849569913982797, "frac_reward_zero_std": 0.0, "grad_norm": 2.7689566592319044, "kl": 0.013153076171875, "learning_rate": 2.211564558965386e-07, "loss": -0.0051, "num_tokens": 171959681.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9615311026573181, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.056503335496903465, "rewards/wordcountpos_reward/raw_geo/std": 0.13622639129810415, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 1058.375, "completions/mean_terminated_length": 1028.933349609375, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.7851570314062812, "frac_reward_zero_std": 0.0, "grad_norm": 3.604153300999687, "kl": 0.01971435546875, "learning_rate": 2.2094203362931102e-07, "loss": -0.0138, "num_tokens": 172000031.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9325066804885864, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17864195893163157, "rewards/wordcountpos_reward/raw_geo/std": 0.13758415704473265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1088.125, "completions/mean_terminated_length": 1088.125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.7853570714142829, "frac_reward_zero_std": 0.0, "grad_norm": 3.3052838258963892, "kl": 0.02032470703125, "learning_rate": 2.2072777181224195e-07, "loss": -0.0054, "num_tokens": 172042425.0, "reward": 0.0, "reward_std": 0.9190503358840942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08067628072634617, "rewards/wordcountpos_reward/raw_geo/std": 0.07995541483706473, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1011.875, "completions/mean_terminated_length": 979.3333740234375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.7855571114222845, "frac_reward_zero_std": 0.0, "grad_norm": 2.894057964668273, "kl": 0.0125579833984375, "learning_rate": 2.2051367054980658e-07, "loss": -0.0346, "num_tokens": 172083447.0, "reward": 0.0, "reward_std": 0.6252776384353638, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09759916805895145, "rewards/wordcountpos_reward/raw_geo/std": 0.05699472797165852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1043.9375, "completions/mean_terminated_length": 938.6923217773438, "completions/min_length": 678.0, "completions/min_terminated_length": 678.0, "epoch": 0.785757151430286, "frac_reward_zero_std": 0.0, "grad_norm": 3.517083412686211, "kl": 0.0150604248046875, "learning_rate": 2.2029972994640156e-07, "loss": -0.0033, "num_tokens": 172123710.0, "reward": 0.0, "reward_std": 0.5482903718948364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0031229303255641203, "rewards/wordcountpos_reward/raw_geo/std": 0.16617832781054306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970786, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 907.9375, "completions/mean_terminated_length": 907.9375, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.7859571914382877, "frac_reward_zero_std": 0.0, "grad_norm": 3.9468134811106053, "kl": 0.019256591796875, "learning_rate": 2.200859501063455e-07, "loss": -0.0158, "num_tokens": 172155389.0, "reward": 0.0, "reward_std": 1.011676549911499, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05281965794752852, "rewards/wordcountpos_reward/raw_geo/std": 0.07603304579994395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1311.25, "completions/mean_terminated_length": 1267.6923828125, "completions/min_length": 1076.0, "completions/min_terminated_length": 1076.0, "epoch": 0.7861572314462892, "frac_reward_zero_std": 0.0, "grad_norm": 2.543437196947759, "kl": 0.014404296875, "learning_rate": 2.1987233113387814e-07, "loss": 0.0223, "num_tokens": 172202345.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9672567844390869, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10760051901951269, "rewards/wordcountpos_reward/raw_geo/std": 0.09883889472449324, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1102.0, "completions/mean_terminated_length": 1075.4666748046875, "completions/min_length": 605.0, "completions/min_terminated_length": 605.0, "epoch": 0.7863572714542909, "frac_reward_zero_std": 0.0, "grad_norm": 3.447540350842473, "kl": 0.024871826171875, "learning_rate": 2.1965887313316127e-07, "loss": -0.0502, "num_tokens": 172250577.0, "reward": 5.960464477539063e-08, "reward_std": 0.846444308757782, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10489969900939064, "rewards/wordcountpos_reward/raw_geo/std": 0.05490018588917264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.15389991938004774, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1122.625, "completions/mean_terminated_length": 1122.625, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.7865573114622925, "frac_reward_zero_std": 0.0, "grad_norm": 3.3691854134231436, "kl": 0.0167236328125, "learning_rate": 2.194455762082778e-07, "loss": 0.0085, "num_tokens": 172286763.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7755496501922607, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009054272459305953, "rewards/wordcountpos_reward/raw_geo/std": 0.07545485873451561, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1167.125, "completions/mean_terminated_length": 1144.933349609375, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.786757351470294, "frac_reward_zero_std": 0.0, "grad_norm": 3.292074125507542, "kl": 0.0178985595703125, "learning_rate": 2.1923244046323263e-07, "loss": -0.0291, "num_tokens": 172326317.0, "reward": -7.450580596923828e-09, "reward_std": 1.0647367238998413, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.05521890562171824, "rewards/wordcountpos_reward/raw_geo/std": 0.07545038286720662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1119.8125, "completions/mean_terminated_length": 1119.8125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.7869573914782957, "frac_reward_zero_std": 0.0, "grad_norm": 3.3695334018170744, "kl": 0.01361083984375, "learning_rate": 2.1901946600195125e-07, "loss": -0.0171, "num_tokens": 172362986.0, "reward": -7.450580596923828e-09, "reward_std": 1.0495967864990234, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.23013184282318044, "rewards/wordcountpos_reward/raw_geo/std": 0.2126263007112368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1295.5, "completions/mean_terminated_length": 1266.2857666015625, "completions/min_length": 1046.0, "completions/min_terminated_length": 1046.0, "epoch": 0.7871574314862972, "frac_reward_zero_std": 0.0, "grad_norm": 2.695774559262394, "kl": 0.0140228271484375, "learning_rate": 2.1880665292828124e-07, "loss": 0.0002, "num_tokens": 172406114.0, "reward": -2.9802322387695312e-08, "reward_std": 0.4214799106121063, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.031587404477049844, "rewards/wordcountpos_reward/raw_geo/std": 0.08392263448389065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1482.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1128.875, "completions/mean_terminated_length": 1128.875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.7873574714942989, "frac_reward_zero_std": 0.0, "grad_norm": 3.2661961812843474, "kl": 0.01806640625, "learning_rate": 2.1859400134599116e-07, "loss": 0.0093, "num_tokens": 172449032.0, "reward": 0.0, "reward_std": 0.6335344314575195, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027796247885653706, "rewards/wordcountpos_reward/raw_geo/std": 0.12554757093788227, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1412.6875, "completions/mean_terminated_length": 1325.375, "completions/min_length": 1091.0, "completions/min_terminated_length": 1091.0, "epoch": 0.7875575115023005, "frac_reward_zero_std": 0.0, "grad_norm": 2.7595832877643076, "kl": 0.0155029296875, "learning_rate": 2.1838151135877108e-07, "loss": 0.0177, "num_tokens": 172502819.0, "reward": 0.0, "reward_std": 0.8411679267883301, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05674405765603819, "rewards/wordcountpos_reward/raw_geo/std": 0.06849263030888589, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1206.1875, "completions/mean_terminated_length": 1186.60009765625, "completions/min_length": 961.0, "completions/min_terminated_length": 961.0, "epoch": 0.787757551510302, "frac_reward_zero_std": 0.0, "grad_norm": 1.7473397369054944, "kl": 0.00708770751953125, "learning_rate": 2.1816918307023174e-07, "loss": -0.0045, "num_tokens": 172545238.0, "reward": -7.450580596923828e-09, "reward_std": 1.0340080261230469, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06040682890090474, "rewards/wordcountpos_reward/raw_geo/std": 0.10493328513202874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1151.3125, "completions/mean_terminated_length": 1151.3125, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.7879575915183037, "frac_reward_zero_std": 0.0, "grad_norm": 2.728359173642178, "kl": 0.018218994140625, "learning_rate": 2.1795701658390553e-07, "loss": 0.013, "num_tokens": 172588275.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8318783044815063, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014556252497624888, "rewards/wordcountpos_reward/raw_geo/std": 0.05628823846796196, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1411.5625, "completions/mean_terminated_length": 1391.1539306640625, "completions/min_length": 1295.0, "completions/min_terminated_length": 1295.0, "epoch": 0.7881576315263052, "frac_reward_zero_std": 0.0, "grad_norm": 2.261617970856637, "kl": 0.010955810546875, "learning_rate": 2.1774501200324584e-07, "loss": 0.0121, "num_tokens": 172643772.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0632352828979492, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09141800959896762, "rewards/wordcountpos_reward/raw_geo/std": 0.10135340066215656, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1095.5625, "completions/mean_terminated_length": 960.75, "completions/min_length": 684.0, "completions/min_terminated_length": 684.0, "epoch": 0.7883576715343069, "frac_reward_zero_std": 0.0, "grad_norm": 3.043941341148776, "kl": 0.013153076171875, "learning_rate": 2.1753316943162718e-07, "loss": 0.0015, "num_tokens": 172697181.0, "reward": 0.0, "reward_std": 0.8775697946548462, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.24109076319105213, "rewards/wordcountpos_reward/raw_geo/std": 0.1339010099755986, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1093.6875, "completions/mean_terminated_length": 1066.60009765625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.7885577115423085, "frac_reward_zero_std": 0.0, "grad_norm": 3.4665535720066556, "kl": 0.01861572265625, "learning_rate": 2.1732148897234452e-07, "loss": 0.0408, "num_tokens": 172732264.0, "reward": 0.0, "reward_std": 0.501916229724884, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.042010801088389005, "rewards/wordcountpos_reward/raw_geo/std": 0.09574643782867195, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1341.9375, "completions/mean_terminated_length": 1270.0909423828125, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.78875775155031, "frac_reward_zero_std": 0.0, "grad_norm": 2.821728565118872, "kl": 0.014190673828125, "learning_rate": 2.1710997072861476e-07, "loss": -0.0716, "num_tokens": 172787471.0, "reward": 0.0, "reward_std": 0.785433292388916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3862506310816613, "rewards/wordcountpos_reward/raw_geo/std": 0.22735802010224573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1330.9375, "completions/mean_terminated_length": 1254.0909423828125, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.7889577915583117, "frac_reward_zero_std": 0.0, "grad_norm": 2.9267926571110836, "kl": 0.0155487060546875, "learning_rate": 2.168986148035748e-07, "loss": -0.0205, "num_tokens": 172824790.0, "reward": 0.0, "reward_std": 0.5812445878982544, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.058287392441723575, "rewards/wordcountpos_reward/raw_geo/std": 0.06032275283674577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1139.75, "completions/mean_terminated_length": 1056.615478515625, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.7891578315663133, "frac_reward_zero_std": 0.0, "grad_norm": 3.1298006051970613, "kl": 0.0172882080078125, "learning_rate": 2.1668742130028277e-07, "loss": -0.026, "num_tokens": 172865906.0, "reward": 0.0, "reward_std": 0.6319822072982788, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12500185618271709, "rewards/wordcountpos_reward/raw_geo/std": 0.13797895597559257, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1206.875, "completions/mean_terminated_length": 1206.875, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.7893578715743149, "frac_reward_zero_std": 0.0, "grad_norm": 2.9625206978923218, "kl": 0.016998291015625, "learning_rate": 2.1647639032171772e-07, "loss": -0.0061, "num_tokens": 172913832.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6969888210296631, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1184531823510418, "rewards/wordcountpos_reward/raw_geo/std": 0.36090566563018783, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1017.3125, "completions/mean_terminated_length": 985.1333618164062, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.7895579115823165, "frac_reward_zero_std": 0.0, "grad_norm": 3.460414600719107, "kl": 0.02020263671875, "learning_rate": 2.1626552197077935e-07, "loss": -0.0611, "num_tokens": 172954725.0, "reward": 0.0, "reward_std": 1.0373830795288086, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.021582428528568058, "rewards/wordcountpos_reward/raw_geo/std": 0.14926829966136856, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 3947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1349.625, "completions/mean_terminated_length": 1232.6666259765625, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.789757951590318, "frac_reward_zero_std": 0.0, "grad_norm": 2.7403742108294358, "kl": 0.015045166015625, "learning_rate": 2.1605481635028778e-07, "loss": 0.0213, "num_tokens": 172998863.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7946590185165405, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08624736706309563, "rewards/wordcountpos_reward/raw_geo/std": 0.06883648395582764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1376.625, "completions/mean_terminated_length": 1335.5, "completions/min_length": 1149.0, "completions/min_terminated_length": 1149.0, "epoch": 0.7899579915983197, "frac_reward_zero_std": 0.0, "grad_norm": 2.8128792670287353, "kl": 0.0157012939453125, "learning_rate": 2.1584427356298418e-07, "loss": -0.0041, "num_tokens": 173051385.0, "reward": 2.9802322387695312e-08, "reward_std": 0.825861930847168, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03519245795238799, "rewards/wordcountpos_reward/raw_geo/std": 0.22354905211245726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1136.8125, "completions/mean_terminated_length": 1112.60009765625, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.7901580316063213, "frac_reward_zero_std": 0.0, "grad_norm": 2.8052614927415918, "kl": 0.010772705078125, "learning_rate": 2.1563389371153017e-07, "loss": -0.005, "num_tokens": 173083774.0, "reward": 0.0, "reward_std": 0.6318582892417908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07865021748730683, "rewards/wordcountpos_reward/raw_geo/std": 0.11540625427227276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1187.5, "completions/mean_terminated_length": 1166.666748046875, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.7903580716143228, "frac_reward_zero_std": 0.0, "grad_norm": 3.1787727702900903, "kl": 0.0149383544921875, "learning_rate": 2.1542367689850804e-07, "loss": -0.0144, "num_tokens": 173136070.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8669468760490417, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22287214744327408, "rewards/wordcountpos_reward/raw_geo/std": 0.0766774789967014, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1185.875, "completions/mean_terminated_length": 997.4000244140625, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.7905581116223245, "frac_reward_zero_std": 0.0, "grad_norm": 2.875131329575665, "kl": 0.0140228271484375, "learning_rate": 2.1521362322642024e-07, "loss": 0.0163, "num_tokens": 173181204.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6346575617790222, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09595772697399448, "rewards/wordcountpos_reward/raw_geo/std": 0.1737889292444753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 1007.3125, "completions/mean_terminated_length": 1007.3125, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.790758151630326, "frac_reward_zero_std": 0.0, "grad_norm": 3.078455913948226, "kl": 0.01763916015625, "learning_rate": 2.1500373279768993e-07, "loss": 0.0018, "num_tokens": 173212033.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7348359823226929, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07533800505348995, "rewards/wordcountpos_reward/raw_geo/std": 0.058076861537321584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1232.125, "completions/mean_terminated_length": 1170.3077392578125, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.7909581916383277, "frac_reward_zero_std": 0.0, "grad_norm": 2.747992649333011, "kl": 0.01971435546875, "learning_rate": 2.1479400571466065e-07, "loss": 0.0061, "num_tokens": 173259835.0, "reward": 7.450580596923828e-09, "reward_std": 1.0540672540664673, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.045112115264822984, "rewards/wordcountpos_reward/raw_geo/std": 0.07344769897158775, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1216.6875, "completions/mean_terminated_length": 1122.25, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.7911582316463293, "frac_reward_zero_std": 0.0, "grad_norm": 3.116170659891666, "kl": 0.0134735107421875, "learning_rate": 2.1458444207959653e-07, "loss": -0.0396, "num_tokens": 173304870.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0398564338684082, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.050968385469906986, "rewards/wordcountpos_reward/raw_geo/std": 0.048702655614992596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 774.625, "completions/mean_terminated_length": 774.625, "completions/min_length": 544.0, "completions/min_terminated_length": 544.0, "epoch": 0.7913582716543308, "frac_reward_zero_std": 0.0, "grad_norm": 3.8077517305155553, "kl": 0.0181884765625, "learning_rate": 2.1437504199468143e-07, "loss": 0.0107, "num_tokens": 173329520.0, "reward": -7.450580596923828e-09, "reward_std": 0.9426548480987549, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.004451579972389716, "rewards/wordcountpos_reward/raw_geo/std": 0.10730351730075724, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16459827639617797, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1040.5625, "completions/mean_terminated_length": 1040.5625, "completions/min_length": 794.0, "completions/min_terminated_length": 794.0, "epoch": 0.7915583116623325, "frac_reward_zero_std": 0.0, "grad_norm": 3.7071118204317175, "kl": 0.0196533203125, "learning_rate": 2.1416580556201985e-07, "loss": -0.0233, "num_tokens": 173371465.0, "reward": 0.0, "reward_std": 1.011763334274292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07734474485684413, "rewards/wordcountpos_reward/raw_geo/std": 0.1185829094812034, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1408.375, "completions/mean_terminated_length": 1316.75, "completions/min_length": 1099.0, "completions/min_terminated_length": 1099.0, "epoch": 0.791758351670334, "frac_reward_zero_std": 0.0, "grad_norm": 3.1784660324940917, "kl": 0.0225830078125, "learning_rate": 2.1395673288363647e-07, "loss": -0.0321, "num_tokens": 173427439.0, "reward": 0.0, "reward_std": 0.7191697359085083, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11946732516907875, "rewards/wordcountpos_reward/raw_geo/std": 0.20557053899393596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1003.25, "completions/mean_terminated_length": 970.1333618164062, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.7919583916783357, "frac_reward_zero_std": 0.0, "grad_norm": 3.2658483653326593, "kl": 0.0161895751953125, "learning_rate": 2.1374782406147623e-07, "loss": -0.0154, "num_tokens": 173459307.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9616564512252808, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1683326751648024, "rewards/wordcountpos_reward/raw_geo/std": 0.1159314397194348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1163.6875, "completions/mean_terminated_length": 1163.6875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.7921584316863373, "frac_reward_zero_std": 0.0, "grad_norm": 2.800282591673636, "kl": 0.010498046875, "learning_rate": 2.135390791974036e-07, "loss": 0.0154, "num_tokens": 173499838.0, "reward": -2.9802322387695312e-08, "reward_std": 0.43570375442504883, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04777897112861029, "rewards/wordcountpos_reward/raw_geo/std": 0.14661681472549917, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639732, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1162.4375, "completions/mean_terminated_length": 1162.4375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.7923584716943388, "frac_reward_zero_std": 0.0, "grad_norm": 3.142065889694054, "kl": 0.02301025390625, "learning_rate": 2.1333049839320405e-07, "loss": -0.0326, "num_tokens": 173546301.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7917525768280029, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07919833969095351, "rewards/wordcountpos_reward/raw_geo/std": 0.07566059086655576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563383, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1337.125, "completions/mean_terminated_length": 1299.5384521484375, "completions/min_length": 1202.0, "completions/min_terminated_length": 1202.0, "epoch": 0.7925585117023405, "frac_reward_zero_std": 0.0, "grad_norm": 3.17807728871495, "kl": 0.0162811279296875, "learning_rate": 2.1312208175058212e-07, "loss": 0.0036, "num_tokens": 173598919.0, "reward": -2.9802322387695312e-08, "reward_std": 0.44021815061569214, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.022222462728894724, "rewards/wordcountpos_reward/raw_geo/std": 0.1351584966530163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114841, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1190.5, "completions/mean_terminated_length": 1190.5, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.792758551710342, "frac_reward_zero_std": 0.0, "grad_norm": 2.0426119554435593, "kl": 0.012176513671875, "learning_rate": 2.12913829371163e-07, "loss": -0.0474, "num_tokens": 173643271.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6465641856193542, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06434488823921415, "rewards/wordcountpos_reward/raw_geo/std": 0.163613370387112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12524050936172842, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1118.6875, "completions/mean_terminated_length": 1093.2667236328125, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.7929585917183437, "frac_reward_zero_std": 0.0, "grad_norm": 3.6460213015522878, "kl": 0.01983642578125, "learning_rate": 2.127057413564911e-07, "loss": -0.0236, "num_tokens": 173693754.0, "reward": 0.0, "reward_std": 1.0262689590454102, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.24953140577384084, "rewards/wordcountpos_reward/raw_geo/std": 0.11391045396812308, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1108.625, "completions/mean_terminated_length": 1082.533447265625, "completions/min_length": 682.0, "completions/min_terminated_length": 682.0, "epoch": 0.7931586317263453, "frac_reward_zero_std": 0.0, "grad_norm": 3.706926875271429, "kl": 0.01873779296875, "learning_rate": 2.1249781780803155e-07, "loss": 0.0344, "num_tokens": 173741156.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8821749687194824, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09227141033603731, "rewards/wordcountpos_reward/raw_geo/std": 0.10741820994254896, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1133.25, "completions/mean_terminated_length": 1133.25, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.7933586717343468, "frac_reward_zero_std": 0.0, "grad_norm": 3.0540118857202967, "kl": 0.0153045654296875, "learning_rate": 2.1229005882716854e-07, "loss": 0.0209, "num_tokens": 173783208.0, "reward": 0.0, "reward_std": 0.9629648923873901, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05221510882682223, "rewards/wordcountpos_reward/raw_geo/std": 0.07503034593729381, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298359, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1285.4375, "completions/mean_terminated_length": 1235.923095703125, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.7935587117423485, "frac_reward_zero_std": 0.0, "grad_norm": 2.1743289175186504, "kl": 0.0088043212890625, "learning_rate": 2.1208246451520633e-07, "loss": -0.0376, "num_tokens": 173818783.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9037868976593018, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009627973165444182, "rewards/wordcountpos_reward/raw_geo/std": 0.037255416784565086, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1124.1875, "completions/mean_terminated_length": 1099.1334228515625, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.79375875175035, "frac_reward_zero_std": 0.0, "grad_norm": 2.8272687201987585, "kl": 0.01300811767578125, "learning_rate": 2.118750349733689e-07, "loss": -0.0143, "num_tokens": 173855170.0, "reward": 0.0, "reward_std": 0.7809925079345703, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.050051442902075596, "rewards/wordcountpos_reward/raw_geo/std": 0.09949566209367446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1183.0, "completions/mean_terminated_length": 1183.0, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.7939587917583517, "frac_reward_zero_std": 0.0, "grad_norm": 3.425904218063067, "kl": 0.0162506103515625, "learning_rate": 2.1166777030279985e-07, "loss": -0.0192, "num_tokens": 173901050.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9042596817016602, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14648718983322082, "rewards/wordcountpos_reward/raw_geo/std": 0.08079881365985604, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1171.0, "completions/max_terminated_length": 1171.0, "completions/mean_length": 963.8125, "completions/mean_terminated_length": 963.8125, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.7941588317663533, "frac_reward_zero_std": 0.0, "grad_norm": 3.8052204288250016, "kl": 0.0283660888671875, "learning_rate": 2.1146067060456229e-07, "loss": 0.0387, "num_tokens": 173930119.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0628840923309326, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.035299320692923415, "rewards/wordcountpos_reward/raw_geo/std": 0.13862018600926154, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1238.8125, "completions/mean_terminated_length": 1201.5, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.7943588717743548, "frac_reward_zero_std": 0.0, "grad_norm": 3.0077678808355808, "kl": 0.01434326171875, "learning_rate": 2.1125373597963904e-07, "loss": -0.0183, "num_tokens": 173977740.0, "reward": 0.0, "reward_std": 0.5154581069946289, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1342610268929902, "rewards/wordcountpos_reward/raw_geo/std": 0.2889443092072768, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1181.1875, "completions/mean_terminated_length": 1159.933349609375, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.7945589117823565, "frac_reward_zero_std": 0.0, "grad_norm": 3.392198535735442, "kl": 0.016510009765625, "learning_rate": 2.110469665289324e-07, "loss": -0.0048, "num_tokens": 174025119.0, "reward": 0.0, "reward_std": 1.0660018920898438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15781606305986723, "rewards/wordcountpos_reward/raw_geo/std": 0.1346791666764634, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1144.5625, "completions/mean_terminated_length": 1120.86669921875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.794758951790358, "frac_reward_zero_std": 0.0, "grad_norm": 3.7832343772140815, "kl": 0.02264404296875, "learning_rate": 2.1084036235326392e-07, "loss": 0.013, "num_tokens": 174068952.0, "reward": 0.0, "reward_std": 0.9670567512512207, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.004237338132761815, "rewards/wordcountpos_reward/raw_geo/std": 0.06671020366072014, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 1006.9375, "completions/mean_terminated_length": 1006.9375, "completions/min_length": 811.0, "completions/min_terminated_length": 811.0, "epoch": 0.7949589917983597, "frac_reward_zero_std": 0.0, "grad_norm": 3.150341911185014, "kl": 0.014617919921875, "learning_rate": 2.106339235533749e-07, "loss": 0.0043, "num_tokens": 174104911.0, "reward": -4.470348358154297e-08, "reward_std": 1.0141900777816772, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05160979978732152, "rewards/wordcountpos_reward/raw_geo/std": 0.08656746260509941, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1049.3125, "completions/mean_terminated_length": 1049.3125, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.7951590318063613, "frac_reward_zero_std": 0.0, "grad_norm": 2.978010448961619, "kl": 0.0152130126953125, "learning_rate": 2.104276502299257e-07, "loss": -0.0084, "num_tokens": 174146404.0, "reward": 0.0, "reward_std": 0.6592285633087158, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10698193459744025, "rewards/wordcountpos_reward/raw_geo/std": 0.14288659633375073, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1207.875, "completions/mean_terminated_length": 1207.875, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.7953590718143628, "frac_reward_zero_std": 0.0, "grad_norm": 2.994432736782927, "kl": 0.0168609619140625, "learning_rate": 2.1022154248349638e-07, "loss": 0.0016, "num_tokens": 174185514.0, "reward": -1.4901161193847656e-08, "reward_std": 1.018214464187622, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009561204331426607, "rewards/wordcountpos_reward/raw_geo/std": 0.10040181626249922, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043477, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1248.5625, "completions/mean_terminated_length": 1134.272705078125, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.7955591118223645, "frac_reward_zero_std": 0.0, "grad_norm": 3.2730725028957726, "kl": 0.019287109375, "learning_rate": 2.1001560041458573e-07, "loss": -0.0034, "num_tokens": 174231043.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8893605470657349, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2183230527058066, "rewards/wordcountpos_reward/raw_geo/std": 0.12844963898953057, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1087.0, "completions/max_terminated_length": 1087.0, "completions/mean_length": 942.9375, "completions/mean_terminated_length": 942.9375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.795759151830366, "frac_reward_zero_std": 0.0, "grad_norm": 3.2152550698019176, "kl": 0.013885498046875, "learning_rate": 2.0980982412361214e-07, "loss": -0.0047, "num_tokens": 174275554.0, "reward": 0.0, "reward_std": 0.903415322303772, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17744980301537133, "rewards/wordcountpos_reward/raw_geo/std": 0.18355110447751322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13270686158262923, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1345.8125, "completions/mean_terminated_length": 1275.727294921875, "completions/min_length": 1139.0, "completions/min_terminated_length": 1139.0, "epoch": 0.7959591918383677, "frac_reward_zero_std": 0.0, "grad_norm": 2.9308581754110006, "kl": 0.0167388916015625, "learning_rate": 2.096042137109131e-07, "loss": 0.0081, "num_tokens": 174327391.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9634082317352295, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2370091651981498, "rewards/wordcountpos_reward/raw_geo/std": 0.07169637697878288, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 918.0625, "completions/mean_terminated_length": 918.0625, "completions/min_length": 313.0, "completions/min_terminated_length": 313.0, "epoch": 0.7961592318463693, "frac_reward_zero_std": 0.0, "grad_norm": 2.987068152329857, "kl": 0.013275146484375, "learning_rate": 2.0939876927674527e-07, "loss": -0.0242, "num_tokens": 174371464.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8090717792510986, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08740980975758585, "rewards/wordcountpos_reward/raw_geo/std": 0.17922025645595985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505421, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.7963592718543708, "frac_reward_zero_std": 0.0, "grad_norm": 2.462534712413671, "kl": 0.017364501953125, "learning_rate": 2.0919349092128403e-07, "loss": -0.0172, "num_tokens": 174412122.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9435210227966309, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05768420396236079, "rewards/wordcountpos_reward/raw_geo/std": 0.09101725862531874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1191.6875, "completions/mean_terminated_length": 1171.1334228515625, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.7965593118623725, "frac_reward_zero_std": 0.0, "grad_norm": 2.9682912652809605, "kl": 0.0154571533203125, "learning_rate": 2.0898837874462422e-07, "loss": -0.016, "num_tokens": 174455005.0, "reward": 0.0, "reward_std": 0.9185692071914673, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.033436418315995003, "rewards/wordcountpos_reward/raw_geo/std": 0.11735109630233682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1213.8125, "completions/mean_terminated_length": 1147.769287109375, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.796759351870374, "frac_reward_zero_std": 0.0, "grad_norm": 1.627231306037126, "kl": 0.00799560546875, "learning_rate": 2.087834328467794e-07, "loss": 0.0084, "num_tokens": 174502866.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5739948153495789, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019864519318520776, "rewards/wordcountpos_reward/raw_geo/std": 0.19628200772358811, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1267.5, "completions/mean_terminated_length": 1252.0001220703125, "completions/min_length": 1091.0, "completions/min_terminated_length": 1091.0, "epoch": 0.7969593918783757, "frac_reward_zero_std": 0.0, "grad_norm": 3.0395350640733803, "kl": 0.014801025390625, "learning_rate": 2.085786533276823e-07, "loss": -0.0333, "num_tokens": 174553058.0, "reward": -2.9802322387695312e-08, "reward_std": 0.456691175699234, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21347343170589989, "rewards/wordcountpos_reward/raw_geo/std": 0.13396488096517667, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1184.9375, "completions/mean_terminated_length": 1163.933349609375, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.7971594318863773, "frac_reward_zero_std": 0.0, "grad_norm": 3.22382785802018, "kl": 0.0170745849609375, "learning_rate": 2.0837404028718408e-07, "loss": -0.0283, "num_tokens": 174599641.0, "reward": 0.0, "reward_std": 0.9552154541015625, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3371067484286272, "rewards/wordcountpos_reward/raw_geo/std": 0.2603472747624892, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1348.1875, "completions/mean_terminated_length": 1257.0999755859375, "completions/min_length": 1040.0, "completions/min_terminated_length": 1040.0, "epoch": 0.7973594718943788, "frac_reward_zero_std": 0.0, "grad_norm": 3.115087646386263, "kl": 0.01953125, "learning_rate": 2.081695938250551e-07, "loss": -0.0034, "num_tokens": 174649212.0, "reward": 0.0, "reward_std": 0.7393419742584229, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0341361000507727, "rewards/wordcountpos_reward/raw_geo/std": 0.06488527983145108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1130.125, "completions/mean_terminated_length": 1130.125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.7975595119023805, "frac_reward_zero_std": 0.0, "grad_norm": 2.655772854440442, "kl": 0.011810302734375, "learning_rate": 2.0796531404098445e-07, "loss": -0.0022, "num_tokens": 174698942.0, "reward": -5.960464477539063e-08, "reward_std": 0.8762080669403076, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20470998902110263, "rewards/wordcountpos_reward/raw_geo/std": 0.13323632288766812, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1026.25, "completions/mean_terminated_length": 1026.25, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.7977595519103821, "frac_reward_zero_std": 0.0, "grad_norm": 3.2746238931767016, "kl": 0.0140838623046875, "learning_rate": 2.0776120103457987e-07, "loss": -0.0439, "num_tokens": 174737562.0, "reward": 5.960464477539063e-08, "reward_std": 0.26248031854629517, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07256749222068795, "rewards/wordcountpos_reward/raw_geo/std": 0.08547064204638737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.7583333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18836528735805194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1157.3125, "completions/mean_terminated_length": 1078.2308349609375, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.7979595919183837, "frac_reward_zero_std": 0.0, "grad_norm": 2.891883305608127, "kl": 0.015411376953125, "learning_rate": 2.0755725490536753e-07, "loss": -0.0089, "num_tokens": 174784807.0, "reward": -2.2351741790771484e-08, "reward_std": 1.031052827835083, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0051088251860538295, "rewards/wordcountpos_reward/raw_geo/std": 0.05847908248744194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1161.3125, "completions/mean_terminated_length": 1161.3125, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.7981596319263853, "frac_reward_zero_std": 0.0, "grad_norm": 3.4379704794667725, "kl": 0.0157623291015625, "learning_rate": 2.073534757527929e-07, "loss": -0.0092, "num_tokens": 174832340.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9816824197769165, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08425261268946921, "rewards/wordcountpos_reward/raw_geo/std": 0.331181472457985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 1070.375, "completions/mean_terminated_length": 1070.375, "completions/min_length": 720.0, "completions/min_terminated_length": 720.0, "epoch": 0.7983596719343868, "frac_reward_zero_std": 0.0, "grad_norm": 2.804770000264064, "kl": 0.0127716064453125, "learning_rate": 2.071498636762193e-07, "loss": -0.0288, "num_tokens": 174875138.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8988010883331299, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11022595437651832, "rewards/wordcountpos_reward/raw_geo/std": 0.06672425437157294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1088.5625, "completions/mean_terminated_length": 1061.1334228515625, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.7985597119423885, "frac_reward_zero_std": 0.0, "grad_norm": 3.6407825380069063, "kl": 0.02935791015625, "learning_rate": 2.0694641877492917e-07, "loss": 0.0026, "num_tokens": 174926827.0, "reward": 0.0, "reward_std": 1.0293048620224, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03692853256782051, "rewards/wordcountpos_reward/raw_geo/std": 0.1395493331267208, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1434.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1061.875, "completions/mean_terminated_length": 1061.875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.7987597519503901, "frac_reward_zero_std": 0.0, "grad_norm": 3.038924512595605, "kl": 0.0113677978515625, "learning_rate": 2.067431411481228e-07, "loss": -0.0072, "num_tokens": 174961737.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0329506397247314, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14340313099468785, "rewards/wordcountpos_reward/raw_geo/std": 0.11126972812107791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 947.5, "completions/mean_terminated_length": 910.6666870117188, "completions/min_length": 578.0, "completions/min_terminated_length": 578.0, "epoch": 0.7989597919583917, "frac_reward_zero_std": 0.0, "grad_norm": 3.5013877609550925, "kl": 0.015625, "learning_rate": 2.065400308949197e-07, "loss": -0.0319, "num_tokens": 174991673.0, "reward": 0.0, "reward_std": 0.7282782196998596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026823846229361455, "rewards/wordcountpos_reward/raw_geo/std": 0.03325292467548696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1202.125, "completions/mean_terminated_length": 1102.8333740234375, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.7991598319663933, "frac_reward_zero_std": 0.0, "grad_norm": 3.17550400630559, "kl": 0.0177001953125, "learning_rate": 2.0633708811435708e-07, "loss": 0.0097, "num_tokens": 175041931.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6712620258331299, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.062068847094592164, "rewards/wordcountpos_reward/raw_geo/std": 0.10692764103442572, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 999.75, "completions/mean_terminated_length": 999.75, "completions/min_length": 685.0, "completions/min_terminated_length": 685.0, "epoch": 0.7993598719743948, "frac_reward_zero_std": 0.0, "grad_norm": 2.8181466286743255, "kl": 0.01708984375, "learning_rate": 2.0613431290539074e-07, "loss": -0.0043, "num_tokens": 175092055.0, "reward": 7.450580596923828e-09, "reward_std": 1.0577911138534546, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.033735480871265176, "rewards/wordcountpos_reward/raw_geo/std": 0.08064001446045366, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 1124.3125, "completions/mean_terminated_length": 1099.2667236328125, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.7995599119823965, "frac_reward_zero_std": 0.0, "grad_norm": 3.0491412195869603, "kl": 0.0160980224609375, "learning_rate": 2.0593170536689498e-07, "loss": 0.0498, "num_tokens": 175132812.0, "reward": -2.9802322387695312e-08, "reward_std": 1.045474886894226, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1491805420950144, "rewards/wordcountpos_reward/raw_geo/std": 0.06618455067895439, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116195, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1142.0, "completions/mean_terminated_length": 1059.3846435546875, "completions/min_length": 664.0, "completions/min_terminated_length": 664.0, "epoch": 0.7997599519903981, "frac_reward_zero_std": 0.0, "grad_norm": 3.3047277141928606, "kl": 0.020538330078125, "learning_rate": 2.0572926559766217e-07, "loss": -0.0551, "num_tokens": 175174076.0, "reward": 0.0, "reward_std": 0.8240252733230591, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14639147796014537, "rewards/wordcountpos_reward/raw_geo/std": 0.11726507794124641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 3998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1214.6875, "completions/mean_terminated_length": 1214.6875, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.7999599919983997, "frac_reward_zero_std": 0.0, "grad_norm": 3.3430959067864596, "kl": 0.018951416015625, "learning_rate": 2.0552699369640263e-07, "loss": -0.0174, "num_tokens": 175217951.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9728735685348511, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016074387109848886, "rewards/wordcountpos_reward/raw_geo/std": 0.10482794362536466, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668904, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 3999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1242.5, "completions/mean_terminated_length": 1183.0770263671875, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.8001600320064013, "frac_reward_zero_std": 0.0, "grad_norm": 2.7504961717367395, "kl": 0.01312255859375, "learning_rate": 2.0532488976174516e-07, "loss": 0.0097, "num_tokens": 175269655.0, "reward": 0.0, "reward_std": 0.4714478850364685, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05524503495861803, "rewards/wordcountpos_reward/raw_geo/std": 0.2097202717684872, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1088.875, "completions/mean_terminated_length": 1088.875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.8003600720144028, "frac_reward_zero_std": 0.0, "grad_norm": 3.179972337885355, "kl": 0.0194091796875, "learning_rate": 2.0512295389223657e-07, "loss": -0.0716, "num_tokens": 175312765.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7141318321228027, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05321348631866966, "rewards/wordcountpos_reward/raw_geo/std": 0.03883507461647332, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 947.75, "completions/mean_terminated_length": 947.75, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.8005601120224045, "frac_reward_zero_std": 0.0, "grad_norm": 3.5204993087524334, "kl": 0.01959228515625, "learning_rate": 2.049211861863419e-07, "loss": -0.03, "num_tokens": 175351089.0, "reward": 0.0, "reward_std": 0.6112874150276184, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015337665467205823, "rewards/wordcountpos_reward/raw_geo/std": 0.08316676967045375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1067.3125, "completions/mean_terminated_length": 1067.3125, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.8007601520304061, "frac_reward_zero_std": 0.0, "grad_norm": 3.1496379152443432, "kl": 0.012542724609375, "learning_rate": 2.0471958674244365e-07, "loss": -0.0327, "num_tokens": 175389046.0, "reward": 0.0, "reward_std": 0.7182726860046387, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1032096979507686, "rewards/wordcountpos_reward/raw_geo/std": 0.1471920375775816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1258.625, "completions/mean_terminated_length": 1148.9091796875, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.8009601920384077, "frac_reward_zero_std": 0.0, "grad_norm": 3.327223497036335, "kl": 0.0205078125, "learning_rate": 2.0451815565884278e-07, "loss": -0.0022, "num_tokens": 175433888.0, "reward": 5.960464477539063e-08, "reward_std": 0.9837850332260132, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12474121103743095, "rewards/wordcountpos_reward/raw_geo/std": 0.11036974514366044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767716, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1097.0, "completions/mean_terminated_length": 1097.0, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.8011602320464093, "frac_reward_zero_std": 0.0, "grad_norm": 2.859696955332242, "kl": 0.016876220703125, "learning_rate": 2.0431689303375805e-07, "loss": -0.0481, "num_tokens": 175484232.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0139211416244507, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04887351779774965, "rewards/wordcountpos_reward/raw_geo/std": 0.047855596230770805, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 929.6875, "completions/mean_terminated_length": 929.6875, "completions/min_length": 688.0, "completions/min_terminated_length": 688.0, "epoch": 0.8013602720544108, "frac_reward_zero_std": 0.0, "grad_norm": 3.9613605410508517, "kl": 0.02032470703125, "learning_rate": 2.0411579896532608e-07, "loss": -0.0375, "num_tokens": 175532355.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0226200819015503, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006196153096697916, "rewards/wordcountpos_reward/raw_geo/std": 0.1867594928999747, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504183, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1292.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1080.5, "completions/mean_terminated_length": 1080.5, "completions/min_length": 796.0, "completions/min_terminated_length": 796.0, "epoch": 0.8015603120624125, "frac_reward_zero_std": 0.0, "grad_norm": 3.252459731692157, "kl": 0.01837158203125, "learning_rate": 2.0391487355160106e-07, "loss": -0.0211, "num_tokens": 175579307.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8900671601295471, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07554969912987022, "rewards/wordcountpos_reward/raw_geo/std": 0.062017099782231376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282607, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1144.25, "completions/mean_terminated_length": 1062.1539306640625, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.8017603520704141, "frac_reward_zero_std": 0.0, "grad_norm": 2.8762678091400264, "kl": 0.013641357421875, "learning_rate": 2.0371411689055524e-07, "loss": -0.0566, "num_tokens": 175617831.0, "reward": -2.9802322387695312e-08, "reward_std": 0.3160289525985718, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.022823613548287568, "rewards/wordcountpos_reward/raw_geo/std": 0.14212336201501852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1304.0, "completions/mean_terminated_length": 1186.4000244140625, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.8019603920784156, "frac_reward_zero_std": 0.0, "grad_norm": 2.521221677077081, "kl": 0.013641357421875, "learning_rate": 2.035135290800784e-07, "loss": -0.0163, "num_tokens": 175671911.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0612430572509766, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08097792357612149, "rewards/wordcountpos_reward/raw_geo/std": 0.07480087861182731, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1223.875, "completions/mean_terminated_length": 1223.875, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.8021604320864173, "frac_reward_zero_std": 0.0, "grad_norm": 3.431459944449437, "kl": 0.020263671875, "learning_rate": 2.0331311021797836e-07, "loss": -0.0149, "num_tokens": 175721741.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7948671579360962, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0014974706438526724, "rewards/wordcountpos_reward/raw_geo/std": 0.08289604780579515, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.17554149029450222, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1063.875, "completions/mean_terminated_length": 1063.875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.8023604720944189, "frac_reward_zero_std": 0.0, "grad_norm": 2.6889726179658213, "kl": 0.018157958984375, "learning_rate": 2.0311286040197978e-07, "loss": -0.0038, "num_tokens": 175762459.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9939166307449341, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1774571602414264, "rewards/wordcountpos_reward/raw_geo/std": 0.07206965246516242, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0596284793999944, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1210.0, "completions/mean_terminated_length": 1168.571533203125, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.8025605121024205, "frac_reward_zero_std": 0.0, "grad_norm": 3.310816730335727, "kl": 0.018951416015625, "learning_rate": 2.0291277972972588e-07, "loss": -0.0212, "num_tokens": 175799227.0, "reward": 0.0, "reward_std": 0.7569400668144226, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16858298511122546, "rewards/wordcountpos_reward/raw_geo/std": 0.15055721082766785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 1068.9375, "completions/mean_terminated_length": 1068.9375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.8027605521104221, "frac_reward_zero_std": 0.0, "grad_norm": 2.465836774751342, "kl": 0.0110626220703125, "learning_rate": 2.0271286829877655e-07, "loss": -0.0188, "num_tokens": 175842146.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0647926330566406, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03017106939380941, "rewards/wordcountpos_reward/raw_geo/std": 0.051697518495682326, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1116.0625, "completions/mean_terminated_length": 1116.0625, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.8029605921184236, "frac_reward_zero_std": 0.0, "grad_norm": 3.244981135700581, "kl": 0.0164947509765625, "learning_rate": 2.0251312620660975e-07, "loss": 0.0404, "num_tokens": 175892171.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9426315426826477, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10864272076499049, "rewards/wordcountpos_reward/raw_geo/std": 0.13134107917527954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252809, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1312.9375, "completions/mean_terminated_length": 1250.5833740234375, "completions/min_length": 948.0, "completions/min_terminated_length": 948.0, "epoch": 0.8031606321264253, "frac_reward_zero_std": 0.0, "grad_norm": 2.968235578403958, "kl": 0.02044677734375, "learning_rate": 2.023135535506203e-07, "loss": 0.0095, "num_tokens": 175939450.0, "reward": -3.725290298461914e-09, "reward_std": 1.062009334564209, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.014206213458082746, "rewards/wordcountpos_reward/raw_geo/std": 0.04571235539876159, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 999.0, "completions/mean_terminated_length": 999.0, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.8033606721344269, "frac_reward_zero_std": 0.0, "grad_norm": 3.228456688731486, "kl": 0.0154266357421875, "learning_rate": 2.0211415042812115e-07, "loss": -0.0065, "num_tokens": 175990026.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7917953729629517, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12207798395464955, "rewards/wordcountpos_reward/raw_geo/std": 0.16393767967901618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1189.25, "completions/mean_terminated_length": 1189.25, "completions/min_length": 1063.0, "completions/min_terminated_length": 1063.0, "epoch": 0.8035607121424285, "frac_reward_zero_std": 0.0, "grad_norm": 2.7909015055901856, "kl": 0.013885498046875, "learning_rate": 2.0191491693634188e-07, "loss": 0.0041, "num_tokens": 176034582.0, "reward": 0.0, "reward_std": 0.5248676538467407, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08672674761584975, "rewards/wordcountpos_reward/raw_geo/std": 0.05676379665040995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1108.1875, "completions/mean_terminated_length": 1082.0667724609375, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.8037607521504301, "frac_reward_zero_std": 0.0, "grad_norm": 2.847587216956848, "kl": 0.01544189453125, "learning_rate": 2.0171585317242973e-07, "loss": -0.0444, "num_tokens": 176067513.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0274896621704102, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0966019216872104, "rewards/wordcountpos_reward/raw_geo/std": 0.03761150846287654, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1342.8125, "completions/mean_terminated_length": 1140.71435546875, "completions/min_length": 1032.0, "completions/min_terminated_length": 1032.0, "epoch": 0.8039607921584316, "frac_reward_zero_std": 0.0, "grad_norm": 2.2102406911726833, "kl": 0.0104217529296875, "learning_rate": 2.015169592334491e-07, "loss": 0.0432, "num_tokens": 176117606.0, "reward": 0.0, "reward_std": 0.7376229166984558, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011799863021349196, "rewards/wordcountpos_reward/raw_geo/std": 0.21028512266825827, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1190.6875, "completions/mean_terminated_length": 1190.6875, "completions/min_length": 1032.0, "completions/min_terminated_length": 1032.0, "epoch": 0.8041608321664333, "frac_reward_zero_std": 0.0, "grad_norm": 2.7351598826321806, "kl": 0.012481689453125, "learning_rate": 2.013182352163817e-07, "loss": 0.006, "num_tokens": 176158073.0, "reward": 0.0, "reward_std": 0.8917276263237, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04240755987499811, "rewards/wordcountpos_reward/raw_geo/std": 0.12307558483285641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1280.0625, "completions/mean_terminated_length": 1229.3077392578125, "completions/min_length": 1022.0, "completions/min_terminated_length": 1022.0, "epoch": 0.8043608721744349, "frac_reward_zero_std": 0.0, "grad_norm": 3.1455103010218597, "kl": 0.017333984375, "learning_rate": 2.01119681218126e-07, "loss": -0.0138, "num_tokens": 176211378.0, "reward": 0.0, "reward_std": 0.6905918121337891, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005601239554163095, "rewards/wordcountpos_reward/raw_geo/std": 0.16594853169577412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 981.125, "completions/mean_terminated_length": 981.125, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.8045609121824365, "frac_reward_zero_std": 0.0, "grad_norm": 3.8536009871053905, "kl": 0.018157958984375, "learning_rate": 2.0092129733549794e-07, "loss": -0.008, "num_tokens": 176257140.0, "reward": -1.862645149230957e-08, "reward_std": 1.0554841756820679, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10934941740728481, "rewards/wordcountpos_reward/raw_geo/std": 0.07185291864917792, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476839, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1028.625, "completions/mean_terminated_length": 1028.625, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.8047609521904381, "frac_reward_zero_std": 0.0, "grad_norm": 3.272833042334278, "kl": 0.02130126953125, "learning_rate": 2.0072308366523053e-07, "loss": -0.0567, "num_tokens": 176296606.0, "reward": 5.960464477539063e-08, "reward_std": 0.7275924682617188, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03419250892385064, "rewards/wordcountpos_reward/raw_geo/std": 0.1525614108686269, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 986.5625, "completions/mean_terminated_length": 986.5625, "completions/min_length": 748.0, "completions/min_terminated_length": 748.0, "epoch": 0.8049609921984396, "frac_reward_zero_std": 0.0, "grad_norm": 3.3690788716131075, "kl": 0.014312744140625, "learning_rate": 2.005250403039736e-07, "loss": -0.0128, "num_tokens": 176331055.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6529707908630371, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.034230669154915044, "rewards/wordcountpos_reward/raw_geo/std": 0.12477688183602932, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970784, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1223.75, "completions/mean_terminated_length": 1131.666748046875, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.8051610322064413, "frac_reward_zero_std": 0.0, "grad_norm": 3.1591611059501483, "kl": 0.01806640625, "learning_rate": 2.0032716734829385e-07, "loss": -0.0049, "num_tokens": 176370971.0, "reward": 5.960464477539063e-08, "reward_std": 0.6792475581169128, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07742262833748087, "rewards/wordcountpos_reward/raw_geo/std": 0.10383424569901116, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10741060020797315, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 860.5, "completions/mean_terminated_length": 860.5, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.8053610722144429, "frac_reward_zero_std": 0.0, "grad_norm": 4.405015007541899, "kl": 0.019561767578125, "learning_rate": 2.0012946489467513e-07, "loss": -0.0795, "num_tokens": 176409467.0, "reward": 1.4901161193847656e-08, "reward_std": 0.966825008392334, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06853677660437614, "rewards/wordcountpos_reward/raw_geo/std": 0.08072307344612736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.15563490039905004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1003.0625, "completions/mean_terminated_length": 1003.0625, "completions/min_length": 703.0, "completions/min_terminated_length": 703.0, "epoch": 0.8055611122224445, "frac_reward_zero_std": 0.0, "grad_norm": 3.548224043305359, "kl": 0.0142669677734375, "learning_rate": 1.9993193303951818e-07, "loss": 0.0027, "num_tokens": 176457004.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6362415552139282, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13763860942696987, "rewards/wordcountpos_reward/raw_geo/std": 0.0885421183202455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1222.875, "completions/mean_terminated_length": 1222.875, "completions/min_length": 1032.0, "completions/min_terminated_length": 1032.0, "epoch": 0.8057611522304461, "frac_reward_zero_std": 0.0, "grad_norm": 3.0803709616448263, "kl": 0.0137481689453125, "learning_rate": 1.997345718791402e-07, "loss": 0.0342, "num_tokens": 176502850.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9412546753883362, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09761895817157995, "rewards/wordcountpos_reward/raw_geo/std": 0.12092046279174777, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1265.125, "completions/mean_terminated_length": 1158.3636474609375, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.8059611922384476, "frac_reward_zero_std": 0.0, "grad_norm": 2.542793819050974, "kl": 0.0144500732421875, "learning_rate": 1.9953738150977548e-07, "loss": -0.0217, "num_tokens": 176548964.0, "reward": 0.0, "reward_std": 0.4614558219909668, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07415442440629026, "rewards/wordcountpos_reward/raw_geo/std": 0.3008821838630261, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 934.625, "completions/mean_terminated_length": 934.625, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.8061612322464493, "frac_reward_zero_std": 0.0, "grad_norm": 3.406383045910013, "kl": 0.022216796875, "learning_rate": 1.9934036202757492e-07, "loss": -0.0352, "num_tokens": 176598246.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8780084848403931, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027157838814781433, "rewards/wordcountpos_reward/raw_geo/std": 0.05299498976121311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1149.625, "completions/mean_terminated_length": 1126.2667236328125, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.8063612722544509, "frac_reward_zero_std": 0.0, "grad_norm": 2.3822989260262384, "kl": 0.0127105712890625, "learning_rate": 1.9914351352860632e-07, "loss": -0.0063, "num_tokens": 176627704.0, "reward": 0.0, "reward_std": 0.8570871353149414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0068254540191616765, "rewards/wordcountpos_reward/raw_geo/std": 0.054150360018201775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1245.0, "completions/mean_terminated_length": 1228.0001220703125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8065613122624525, "frac_reward_zero_std": 0.0, "grad_norm": 3.1486173217193154, "kl": 0.0175323486328125, "learning_rate": 1.989468361088536e-07, "loss": -0.0132, "num_tokens": 176678520.0, "reward": 0.0, "reward_std": 0.8900086283683777, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.204345119646467, "rewards/wordcountpos_reward/raw_geo/std": 0.10611205435791622, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1029.375, "completions/mean_terminated_length": 1029.375, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.8067613522704541, "frac_reward_zero_std": 0.0, "grad_norm": 3.2003682701923015, "kl": 0.02001953125, "learning_rate": 1.9875032986421764e-07, "loss": -0.0258, "num_tokens": 176731134.0, "reward": 0.0, "reward_std": 0.8016079068183899, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05482634124978669, "rewards/wordcountpos_reward/raw_geo/std": 0.09540247976520248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1083.0, "completions/mean_terminated_length": 1083.0, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.8069613922784556, "frac_reward_zero_std": 0.0, "grad_norm": 3.709637470700691, "kl": 0.02130126953125, "learning_rate": 1.9855399489051588e-07, "loss": -0.0397, "num_tokens": 176769742.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0541481971740723, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.010529446175823728, "rewards/wordcountpos_reward/raw_geo/std": 0.029111124537196782, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.16187558093703852, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 1039.4375, "completions/mean_terminated_length": 1039.4375, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.8071614322864573, "frac_reward_zero_std": 0.0, "grad_norm": 3.600525059911751, "kl": 0.0191650390625, "learning_rate": 1.983578312834822e-07, "loss": -0.0128, "num_tokens": 176816133.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7570257782936096, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008365972241455733, "rewards/wordcountpos_reward/raw_geo/std": 0.1522965579968121, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.107496769977314, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1301.8125, "completions/mean_terminated_length": 1301.8125, "completions/min_length": 1143.0, "completions/min_terminated_length": 1143.0, "epoch": 0.8073614722944589, "frac_reward_zero_std": 0.0, "grad_norm": 2.6049641829836934, "kl": 0.01336669921875, "learning_rate": 1.9816183913876665e-07, "loss": -0.0126, "num_tokens": 176866306.0, "reward": -3.725290298461914e-08, "reward_std": 1.0533214807510376, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2396783090342164, "rewards/wordcountpos_reward/raw_geo/std": 0.09014405832568573, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1242.5625, "completions/mean_terminated_length": 1205.7857666015625, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.8075615123024605, "frac_reward_zero_std": 0.0, "grad_norm": 3.207026677787394, "kl": 0.0179901123046875, "learning_rate": 1.97966018551936e-07, "loss": 0.0016, "num_tokens": 176924771.0, "reward": 0.0, "reward_std": 0.6860706210136414, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11490269630988133, "rewards/wordcountpos_reward/raw_geo/std": 0.14617738160001645, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1257.0625, "completions/mean_terminated_length": 1240.86669921875, "completions/min_length": 1092.0, "completions/min_terminated_length": 1092.0, "epoch": 0.8077615523104621, "frac_reward_zero_std": 0.0, "grad_norm": 3.2487139760348986, "kl": 0.01922607421875, "learning_rate": 1.9777036961847327e-07, "loss": -0.0078, "num_tokens": 176974860.0, "reward": 0.0, "reward_std": 0.8382153511047363, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0777196024547201, "rewards/wordcountpos_reward/raw_geo/std": 0.10058985117829537, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1087.5625, "completions/mean_terminated_length": 1087.5625, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.8079615923184637, "frac_reward_zero_std": 0.0, "grad_norm": 3.4991941621088216, "kl": 0.018341064453125, "learning_rate": 1.975748924337778e-07, "loss": -0.0163, "num_tokens": 177021517.0, "reward": 0.0, "reward_std": 0.9901320934295654, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015032781432721519, "rewards/wordcountpos_reward/raw_geo/std": 0.1358228304839686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 972.9375, "completions/mean_terminated_length": 972.9375, "completions/min_length": 635.0, "completions/min_terminated_length": 635.0, "epoch": 0.8081616323264653, "frac_reward_zero_std": 0.0, "grad_norm": 3.3817288039606646, "kl": 0.018646240234375, "learning_rate": 1.9737958709316494e-07, "loss": -0.0266, "num_tokens": 177059044.0, "reward": 7.450580596923828e-09, "reward_std": 0.9384283423423767, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0015247172942337486, "rewards/wordcountpos_reward/raw_geo/std": 0.08035859024827584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.6375000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.2817603394533888, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 1130.0, "completions/mean_terminated_length": 1130.0, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.8083616723344669, "frac_reward_zero_std": 0.0, "grad_norm": 2.969576147783652, "kl": 0.0123138427734375, "learning_rate": 1.9718445369186686e-07, "loss": 0.008, "num_tokens": 177098940.0, "reward": 0.0, "reward_std": 0.7612095475196838, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11145143826129644, "rewards/wordcountpos_reward/raw_geo/std": 0.13821187700457052, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869927, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1259.75, "completions/mean_terminated_length": 1225.4285888671875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.8085617123424685, "frac_reward_zero_std": 0.0, "grad_norm": 3.0540384775814458, "kl": 0.0169677734375, "learning_rate": 1.9698949232503113e-07, "loss": 0.0092, "num_tokens": 177151024.0, "reward": 0.0, "reward_std": 1.0266233682632446, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.024211886482410717, "rewards/wordcountpos_reward/raw_geo/std": 0.2555059750366427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1220.0, "completions/max_terminated_length": 1220.0, "completions/mean_length": 1000.3125, "completions/mean_terminated_length": 1000.3125, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.8087617523504701, "frac_reward_zero_std": 0.0, "grad_norm": 2.249089275896, "kl": 0.0117034912109375, "learning_rate": 1.9679470308772193e-07, "loss": -0.0359, "num_tokens": 177186453.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5029071569442749, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07298882054989499, "rewards/wordcountpos_reward/raw_geo/std": 0.16729631357193667, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1043.75, "completions/mean_terminated_length": 1043.75, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.8089617923584717, "frac_reward_zero_std": 0.0, "grad_norm": 3.635821861215797, "kl": 0.020050048828125, "learning_rate": 1.966000860749194e-07, "loss": -0.0303, "num_tokens": 177222297.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0283204317092896, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05725316710758451, "rewards/wordcountpos_reward/raw_geo/std": 0.1755901378636492, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1328.0, "completions/mean_length": 1093.4375, "completions/mean_terminated_length": 1066.3333740234375, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.8091618323664733, "frac_reward_zero_std": 0.0, "grad_norm": 3.630729473993342, "kl": 0.0195770263671875, "learning_rate": 1.9640564138151972e-07, "loss": -0.049, "num_tokens": 177269544.0, "reward": 0.0, "reward_std": 0.713876485824585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09949128811338928, "rewards/wordcountpos_reward/raw_geo/std": 0.19169800768103043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1007.0, "completions/mean_terminated_length": 1007.0, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.8093618723744749, "frac_reward_zero_std": 0.0, "grad_norm": 2.941528834727588, "kl": 0.015960693359375, "learning_rate": 1.9621136910233484e-07, "loss": -0.0266, "num_tokens": 177304800.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0271155834197998, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18623406129795517, "rewards/wordcountpos_reward/raw_geo/std": 0.16710808880603345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 1009.875, "completions/mean_terminated_length": 1009.875, "completions/min_length": 741.0, "completions/min_terminated_length": 741.0, "epoch": 0.8095619123824765, "frac_reward_zero_std": 0.0, "grad_norm": 2.6991560140512783, "kl": 0.0138092041015625, "learning_rate": 1.9601726933209295e-07, "loss": -0.0554, "num_tokens": 177351214.0, "reward": -3.725290298461914e-09, "reward_std": 1.0272059440612793, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.15702528556735446, "rewards/wordcountpos_reward/raw_geo/std": 0.07051529142404739, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1177.3125, "completions/mean_terminated_length": 1155.800048828125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.8097619523904781, "frac_reward_zero_std": 0.0, "grad_norm": 2.802499519432688, "kl": 0.0133514404296875, "learning_rate": 1.9582334216543789e-07, "loss": -0.0395, "num_tokens": 177402739.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8959676623344421, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00034420695449875966, "rewards/wordcountpos_reward/raw_geo/std": 0.05534415963486704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14446581038560777, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1071.0625, "completions/mean_terminated_length": 1071.0625, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.8099619923984797, "frac_reward_zero_std": 0.0, "grad_norm": 2.5842232635016, "kl": 0.0128326416015625, "learning_rate": 1.956295876969296e-07, "loss": -0.0448, "num_tokens": 177432092.0, "reward": 0.0, "reward_std": 0.6665298938751221, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05433895238769417, "rewards/wordcountpos_reward/raw_geo/std": 0.21344177504376693, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1131.4375, "completions/mean_terminated_length": 1131.4375, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8101620324064813, "frac_reward_zero_std": 0.0, "grad_norm": 3.2437988052715507, "kl": 0.0136566162109375, "learning_rate": 1.9543600602104337e-07, "loss": -0.0334, "num_tokens": 177476363.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0476105213165283, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03254527436083805, "rewards/wordcountpos_reward/raw_geo/std": 0.053466126974999736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1033.125, "completions/mean_terminated_length": 1033.125, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.8103620724144829, "frac_reward_zero_std": 0.0, "grad_norm": 3.515710316104769, "kl": 0.0184173583984375, "learning_rate": 1.9524259723217074e-07, "loss": -0.0254, "num_tokens": 177508917.0, "reward": 0.0, "reward_std": 0.676999032497406, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006089576558515807, "rewards/wordcountpos_reward/raw_geo/std": 0.109366520214591, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1002.8125, "completions/mean_terminated_length": 1002.8125, "completions/min_length": 750.0, "completions/min_terminated_length": 750.0, "epoch": 0.8105621124224845, "frac_reward_zero_std": 0.0, "grad_norm": 2.9828313106236544, "kl": 0.01306915283203125, "learning_rate": 1.9504936142461852e-07, "loss": 0.0246, "num_tokens": 177546922.0, "reward": 0.0, "reward_std": 0.7886447906494141, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18860708570898296, "rewards/wordcountpos_reward/raw_geo/std": 0.13868690827673147, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1183.5, "completions/mean_terminated_length": 1110.4615478515625, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.8107621524304861, "frac_reward_zero_std": 0.0, "grad_norm": 3.4479218485918404, "kl": 0.0186309814453125, "learning_rate": 1.948562986926096e-07, "loss": 0.007, "num_tokens": 177587386.0, "reward": -1.4901161193847656e-08, "reward_std": 1.049615502357483, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13909944330784438, "rewards/wordcountpos_reward/raw_geo/std": 0.147931017393785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1219.6875, "completions/mean_terminated_length": 1179.6429443359375, "completions/min_length": 993.0, "completions/min_terminated_length": 993.0, "epoch": 0.8109621924384877, "frac_reward_zero_std": 0.0, "grad_norm": 2.6408033653024936, "kl": 0.01580810546875, "learning_rate": 1.94663409130282e-07, "loss": 0.0518, "num_tokens": 177627821.0, "reward": 0.0, "reward_std": 0.8761932849884033, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.039921087915585685, "rewards/wordcountpos_reward/raw_geo/std": 0.06445564801330772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 1110.8125, "completions/mean_terminated_length": 1110.8125, "completions/min_length": 668.0, "completions/min_terminated_length": 668.0, "epoch": 0.8111622324464893, "frac_reward_zero_std": 0.0, "grad_norm": 3.0696095623790303, "kl": 0.0162200927734375, "learning_rate": 1.944706928316897e-07, "loss": -0.0089, "num_tokens": 177671778.0, "reward": 2.9802322387695312e-08, "reward_std": 0.855747640132904, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09152393508774853, "rewards/wordcountpos_reward/raw_geo/std": 0.09618979329328554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1124.8125, "completions/mean_terminated_length": 1099.800048828125, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.8113622724544909, "frac_reward_zero_std": 0.0, "grad_norm": 3.5872398347973906, "kl": 0.021148681640625, "learning_rate": 1.9427814989080194e-07, "loss": -0.0532, "num_tokens": 177705623.0, "reward": 0.0, "reward_std": 0.8143057823181152, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.020791795699136414, "rewards/wordcountpos_reward/raw_geo/std": 0.04219480389474686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 1128.6875, "completions/mean_terminated_length": 1103.933349609375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.8115623124624926, "frac_reward_zero_std": 0.0, "grad_norm": 3.5537167959745095, "kl": 0.021270751953125, "learning_rate": 1.9408578040150379e-07, "loss": -0.0193, "num_tokens": 177751842.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9750949740409851, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06788644081732087, "rewards/wordcountpos_reward/raw_geo/std": 0.0953330466354825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 1144.1875, "completions/mean_terminated_length": 1144.1875, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.8117623524704941, "frac_reward_zero_std": 0.0, "grad_norm": 3.1507869833735613, "kl": 0.020751953125, "learning_rate": 1.9389358445759497e-07, "loss": -0.0244, "num_tokens": 177799005.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5411153435707092, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05935052024760615, "rewards/wordcountpos_reward/raw_geo/std": 0.1005632326018373, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1040.6875, "completions/mean_terminated_length": 1040.6875, "completions/min_length": 772.0, "completions/min_terminated_length": 772.0, "epoch": 0.8119623924784957, "frac_reward_zero_std": 0.0, "grad_norm": 3.700591822896545, "kl": 0.031829833984375, "learning_rate": 1.9370156215279166e-07, "loss": -0.0132, "num_tokens": 177843944.0, "reward": 3.725290298461914e-09, "reward_std": 0.9997601509094238, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.2579659422106824, "rewards/wordcountpos_reward/raw_geo/std": 0.15707719669342982, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1093.1875, "completions/mean_terminated_length": 1066.0667724609375, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.8121624324864973, "frac_reward_zero_std": 0.0, "grad_norm": 3.6467131587821418, "kl": 0.020782470703125, "learning_rate": 1.935097135807245e-07, "loss": -0.0038, "num_tokens": 177876187.0, "reward": 7.450580596923828e-09, "reward_std": 1.0406489372253418, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1351734763786182, "rewards/wordcountpos_reward/raw_geo/std": 0.13853117092028694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1345.625, "completions/mean_terminated_length": 1310.0, "completions/min_length": 1183.0, "completions/min_terminated_length": 1183.0, "epoch": 0.8123624724944989, "frac_reward_zero_std": 0.0, "grad_norm": 3.0171229347373205, "kl": 0.0169219970703125, "learning_rate": 1.9331803883493985e-07, "loss": 0.0194, "num_tokens": 177929077.0, "reward": 0.0, "reward_std": 0.6655166149139404, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02561856816074707, "rewards/wordcountpos_reward/raw_geo/std": 0.0670581711421461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1121.0, "completions/max_terminated_length": 1121.0, "completions/mean_length": 908.25, "completions/mean_terminated_length": 908.25, "completions/min_length": 601.0, "completions/min_terminated_length": 601.0, "epoch": 0.8125625125025004, "frac_reward_zero_std": 0.0, "grad_norm": 3.581372534114602, "kl": 0.015472412109375, "learning_rate": 1.931265380088989e-07, "loss": -0.0767, "num_tokens": 177969505.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6583057641983032, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022679980375295825, "rewards/wordcountpos_reward/raw_geo/std": 0.0837356718542811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1194.0625, "completions/mean_terminated_length": 1194.0625, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.8127625525105021, "frac_reward_zero_std": 0.0, "grad_norm": 2.769854401247188, "kl": 0.017547607421875, "learning_rate": 1.9293521119597878e-07, "loss": -0.0159, "num_tokens": 178012786.0, "reward": 0.0, "reward_std": 0.772948145866394, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23500520463341795, "rewards/wordcountpos_reward/raw_geo/std": 0.14441782489863025, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1317.375, "completions/mean_terminated_length": 1234.3636474609375, "completions/min_length": 1093.0, "completions/min_terminated_length": 1093.0, "epoch": 0.8129625925185037, "frac_reward_zero_std": 0.0, "grad_norm": 3.2589599021345275, "kl": 0.0191802978515625, "learning_rate": 1.92744058489471e-07, "loss": -0.0274, "num_tokens": 178061920.0, "reward": 1.4901161193847656e-08, "reward_std": 0.960946798324585, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07662720356384563, "rewards/wordcountpos_reward/raw_geo/std": 0.08465956043801566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1064.9375, "completions/mean_terminated_length": 1064.9375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.8131626325265053, "frac_reward_zero_std": 0.0, "grad_norm": 3.021213228352489, "kl": 0.014495849609375, "learning_rate": 1.9255307998258253e-07, "loss": -0.0764, "num_tokens": 178112599.0, "reward": -2.9802322387695312e-08, "reward_std": 0.43809396028518677, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07465428598690069, "rewards/wordcountpos_reward/raw_geo/std": 0.08537865619594606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1236.0, "completions/mean_terminated_length": 1236.0, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.8133626725345069, "frac_reward_zero_std": 0.0, "grad_norm": 2.6667088086588917, "kl": 0.0121307373046875, "learning_rate": 1.9236227576843533e-07, "loss": -0.0048, "num_tokens": 178157343.0, "reward": 0.0, "reward_std": 0.9625179767608643, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09874671827610457, "rewards/wordcountpos_reward/raw_geo/std": 0.2445258835149296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1394.4375, "completions/mean_terminated_length": 1288.875, "completions/min_length": 1094.0, "completions/min_terminated_length": 1094.0, "epoch": 0.8135627125425084, "frac_reward_zero_std": 0.0, "grad_norm": 3.25875985841581, "kl": 0.017669677734375, "learning_rate": 1.921716459400667e-07, "loss": -0.0355, "num_tokens": 178206230.0, "reward": -4.470348358154297e-08, "reward_std": 0.9916976690292358, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.16879987665131074, "rewards/wordcountpos_reward/raw_geo/std": 0.12217959734505926, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1072.3125, "completions/mean_terminated_length": 1043.800048828125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.8137627525505101, "frac_reward_zero_std": 0.0, "grad_norm": 2.9621199472757076, "kl": 0.0129547119140625, "learning_rate": 1.919811905904282e-07, "loss": 0.0624, "num_tokens": 178237139.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5716201663017273, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1315496627264715, "rewards/wordcountpos_reward/raw_geo/std": 0.12444734487917687, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1119.125, "completions/mean_terminated_length": 1064.71435546875, "completions/min_length": 705.0, "completions/min_terminated_length": 705.0, "epoch": 0.8139627925585117, "frac_reward_zero_std": 0.0, "grad_norm": 3.5178278948340123, "kl": 0.021575927734375, "learning_rate": 1.9179090981238688e-07, "loss": -0.1014, "num_tokens": 178290781.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7580597996711731, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0409835857030271, "rewards/wordcountpos_reward/raw_geo/std": 0.05541625173079122, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1245.4375, "completions/mean_terminated_length": 1209.071533203125, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.8141628325665133, "frac_reward_zero_std": 0.0, "grad_norm": 2.8438747668147824, "kl": 0.014373779296875, "learning_rate": 1.9160080369872457e-07, "loss": 0.0186, "num_tokens": 178331164.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5147362947463989, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07067489309217384, "rewards/wordcountpos_reward/raw_geo/std": 0.23987144127715285, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1132.6875, "completions/mean_terminated_length": 1132.6875, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8143628725745149, "frac_reward_zero_std": 0.0, "grad_norm": 3.106436953659906, "kl": 0.01611328125, "learning_rate": 1.914108723421379e-07, "loss": 0.0009, "num_tokens": 178364271.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0481936931610107, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05691301425078468, "rewards/wordcountpos_reward/raw_geo/std": 0.08275026445261222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 1087.125, "completions/mean_terminated_length": 1087.125, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.8145629125825165, "frac_reward_zero_std": 0.0, "grad_norm": 3.0739076076510385, "kl": 0.01947021484375, "learning_rate": 1.9122111583523815e-07, "loss": 0.0058, "num_tokens": 178411761.0, "reward": 0.0, "reward_std": 0.661535918712616, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.008366005181204518, "rewards/wordcountpos_reward/raw_geo/std": 0.08812782380096677, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1140.3125, "completions/mean_terminated_length": 1057.3077392578125, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.8147629525905181, "frac_reward_zero_std": 0.0, "grad_norm": 3.0731011183261367, "kl": 0.017425537109375, "learning_rate": 1.9103153427055145e-07, "loss": -0.0663, "num_tokens": 178453070.0, "reward": 7.450580596923828e-09, "reward_std": 1.0611809492111206, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11127579515256335, "rewards/wordcountpos_reward/raw_geo/std": 0.06915284158946933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1177.125, "completions/mean_terminated_length": 1155.60009765625, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.8149629925985197, "frac_reward_zero_std": 0.0, "grad_norm": 3.104655509398908, "kl": 0.0161590576171875, "learning_rate": 1.9084212774051865e-07, "loss": -0.0124, "num_tokens": 178500936.0, "reward": -7.450580596923828e-09, "reward_std": 1.0540945529937744, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1596846577534139, "rewards/wordcountpos_reward/raw_geo/std": 0.07953174471254885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185827, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1453.8125, "completions/mean_terminated_length": 1376.8333740234375, "completions/min_length": 1251.0, "completions/min_terminated_length": 1251.0, "epoch": 0.8151630326065213, "frac_reward_zero_std": 0.0, "grad_norm": 2.6816286598017234, "kl": 0.01416015625, "learning_rate": 1.9065289633749543e-07, "loss": 0.0007, "num_tokens": 178557581.0, "reward": 0.0, "reward_std": 0.5697770118713379, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07289524695974023, "rewards/wordcountpos_reward/raw_geo/std": 0.21143542265712775, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 922.1875, "completions/mean_terminated_length": 922.1875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.8153630726145229, "frac_reward_zero_std": 0.0, "grad_norm": 3.977279685614983, "kl": 0.024078369140625, "learning_rate": 1.9046384015375157e-07, "loss": 0.1111, "num_tokens": 178608576.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7995089292526245, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11100929109770523, "rewards/wordcountpos_reward/raw_geo/std": 0.08726846552973762, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1079.8125, "completions/mean_terminated_length": 1079.8125, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.8155631126225245, "frac_reward_zero_std": 0.0, "grad_norm": 2.9151833383808166, "kl": 0.02191162109375, "learning_rate": 1.9027495928147192e-07, "loss": 0.034, "num_tokens": 178648789.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5828340649604797, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027725859519019957, "rewards/wordcountpos_reward/raw_geo/std": 0.03299641973292629, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1096.9375, "completions/mean_terminated_length": 1096.9375, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.8157631526305261, "frac_reward_zero_std": 0.0, "grad_norm": 2.9479009124913484, "kl": 0.0167999267578125, "learning_rate": 1.9008625381275555e-07, "loss": 0.0086, "num_tokens": 178686548.0, "reward": 0.0, "reward_std": 0.7251713275909424, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08491699895032764, "rewards/wordcountpos_reward/raw_geo/std": 0.09946992741762645, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1256.75, "completions/mean_terminated_length": 1240.533447265625, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.8159631926385277, "frac_reward_zero_std": 0.0, "grad_norm": 3.1847409932795014, "kl": 0.0183868408203125, "learning_rate": 1.898977238396163e-07, "loss": -0.0349, "num_tokens": 178731856.0, "reward": -7.450580596923828e-09, "reward_std": 1.032332420349121, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.08982549883354181, "rewards/wordcountpos_reward/raw_geo/std": 0.1694151233267355, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1165.1875, "completions/mean_terminated_length": 1142.86669921875, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.8161632326465293, "frac_reward_zero_std": 0.0, "grad_norm": 2.6369782362580696, "kl": 0.01165771484375, "learning_rate": 1.8970936945398198e-07, "loss": -0.0221, "num_tokens": 178770451.0, "reward": 0.0, "reward_std": 0.8963133096694946, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3136404680615377, "rewards/wordcountpos_reward/raw_geo/std": 0.23778939226526524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1156.9375, "completions/mean_terminated_length": 1156.9375, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.8163632726545309, "frac_reward_zero_std": 0.0, "grad_norm": 3.317857868914168, "kl": 0.0185546875, "learning_rate": 1.8952119074769511e-07, "loss": -0.0358, "num_tokens": 178815402.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0662423372268677, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06636736994369533, "rewards/wordcountpos_reward/raw_geo/std": 0.05662386177121081, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08062257748298551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1252.9375, "completions/mean_terminated_length": 1170.5833740234375, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.8165633126625325, "frac_reward_zero_std": 0.0, "grad_norm": 2.524350744441057, "kl": 0.012237548828125, "learning_rate": 1.8933318781251274e-07, "loss": -0.0196, "num_tokens": 178861313.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7120239734649658, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09452898118431971, "rewards/wordcountpos_reward/raw_geo/std": 0.06433075592474337, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.15049301694147857, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1209.5625, "completions/mean_terminated_length": 1209.5625, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.8167633526705341, "frac_reward_zero_std": 0.0, "grad_norm": 3.058979920721431, "kl": 0.0126190185546875, "learning_rate": 1.8914536074010557e-07, "loss": -0.0179, "num_tokens": 178906874.0, "reward": 0.0, "reward_std": 0.6456544399261475, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08570228332430611, "rewards/wordcountpos_reward/raw_geo/std": 0.13028636439217234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1194.4375, "completions/mean_terminated_length": 1174.0667724609375, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.8169633926785357, "frac_reward_zero_std": 0.0, "grad_norm": 3.5547718680138605, "kl": 0.018310546875, "learning_rate": 1.8895770962205908e-07, "loss": -0.0464, "num_tokens": 178952817.0, "reward": 7.450580596923828e-09, "reward_std": 1.0489003658294678, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.20026384024374028, "rewards/wordcountpos_reward/raw_geo/std": 0.17346069446960255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 920.375, "completions/mean_terminated_length": 920.375, "completions/min_length": 573.0, "completions/min_terminated_length": 573.0, "epoch": 0.8171634326865373, "frac_reward_zero_std": 0.0, "grad_norm": 3.442932731422679, "kl": 0.01605224609375, "learning_rate": 1.887702345498729e-07, "loss": 0.0099, "num_tokens": 178978799.0, "reward": 0.0, "reward_std": 1.0525271892547607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11109403949540021, "rewards/wordcountpos_reward/raw_geo/std": 0.07367203150884799, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362769, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 949.5625, "completions/mean_terminated_length": 949.5625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.8173634726945389, "frac_reward_zero_std": 0.0, "grad_norm": 3.70987785529453, "kl": 0.0173187255859375, "learning_rate": 1.8858293561496081e-07, "loss": -0.0148, "num_tokens": 179011808.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5078814029693604, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22798032857360498, "rewards/wordcountpos_reward/raw_geo/std": 0.26521267786554403, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1130.375, "completions/mean_terminated_length": 1130.375, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.8175635127025405, "frac_reward_zero_std": 0.0, "grad_norm": 3.230274087225994, "kl": 0.0166015625, "learning_rate": 1.8839581290865014e-07, "loss": -0.0275, "num_tokens": 179057742.0, "reward": 7.450580596923828e-09, "reward_std": 1.0633457899093628, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.14516939255136943, "rewards/wordcountpos_reward/raw_geo/std": 0.09651583590936146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1223.9375, "completions/mean_terminated_length": 1223.9375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.8177635527105421, "frac_reward_zero_std": 0.0, "grad_norm": 3.046848536500751, "kl": 0.019561767578125, "learning_rate": 1.8820886652218338e-07, "loss": -0.0089, "num_tokens": 179105069.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0671050548553467, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26835532740511264, "rewards/wordcountpos_reward/raw_geo/std": 0.10870321639364197, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 1167.4375, "completions/mean_terminated_length": 1056.5833740234375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.8179635927185437, "frac_reward_zero_std": 0.0, "grad_norm": 3.173237509189384, "kl": 0.0178680419921875, "learning_rate": 1.8802209654671603e-07, "loss": -0.0032, "num_tokens": 179152372.0, "reward": 0.0, "reward_std": 0.658298909664154, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.015274334893030886, "rewards/wordcountpos_reward/raw_geo/std": 0.07117309467847004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 1083.9375, "completions/mean_terminated_length": 1083.9375, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8181636327265454, "frac_reward_zero_std": 0.0, "grad_norm": 2.9208998691447325, "kl": 0.0164337158203125, "learning_rate": 1.878355030733183e-07, "loss": 0.0121, "num_tokens": 179198779.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8580343723297119, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.055334883987571304, "rewards/wordcountpos_reward/raw_geo/std": 0.22229655687850658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1171.5, "completions/mean_terminated_length": 1171.5, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.8183636727345469, "frac_reward_zero_std": 0.0, "grad_norm": 3.2759076121533415, "kl": 0.018218994140625, "learning_rate": 1.8764908619297354e-07, "loss": -0.0233, "num_tokens": 179233987.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0378913879394531, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06324172888148707, "rewards/wordcountpos_reward/raw_geo/std": 0.1497263263883516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1346.8125, "completions/mean_terminated_length": 1277.181884765625, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.8185637127425485, "frac_reward_zero_std": 0.0, "grad_norm": 3.341388799958416, "kl": 0.02008056640625, "learning_rate": 1.8746284599657996e-07, "loss": -0.0277, "num_tokens": 179287128.0, "reward": 0.0, "reward_std": 1.063018798828125, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19050250179531641, "rewards/wordcountpos_reward/raw_geo/std": 0.15695053147294089, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 1118.9375, "completions/mean_terminated_length": 1118.9375, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.8187637527505501, "frac_reward_zero_std": 0.0, "grad_norm": 3.5766796191614296, "kl": 0.0206298828125, "learning_rate": 1.8727678257494883e-07, "loss": 0.0142, "num_tokens": 179323135.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5955753326416016, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08285803562969998, "rewards/wordcountpos_reward/raw_geo/std": 0.28940159229803797, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1296.4375, "completions/mean_terminated_length": 1228.5833740234375, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.8189637927585517, "frac_reward_zero_std": 0.0, "grad_norm": 3.370740994186922, "kl": 0.02264404296875, "learning_rate": 1.8709089601880562e-07, "loss": -0.0229, "num_tokens": 179375438.0, "reward": -5.960464477539063e-08, "reward_std": 0.6547837257385254, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08972057017072516, "rewards/wordcountpos_reward/raw_geo/std": 0.10946701932599691, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1031.375, "completions/mean_terminated_length": 1031.375, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.8191638327665534, "frac_reward_zero_std": 0.0, "grad_norm": 3.674323436482364, "kl": 0.020843505859375, "learning_rate": 1.8690518641878945e-07, "loss": -0.0424, "num_tokens": 179415220.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0158275365829468, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2868578978248641, "rewards/wordcountpos_reward/raw_geo/std": 0.1359282848599163, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09583937179043478, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1213.5, "completions/mean_terminated_length": 1172.571533203125, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8193638727745549, "frac_reward_zero_std": 0.0, "grad_norm": 2.7537983929957597, "kl": 0.0135650634765625, "learning_rate": 1.8671965386545325e-07, "loss": -0.0197, "num_tokens": 179457708.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8955031037330627, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054513910120314896, "rewards/wordcountpos_reward/raw_geo/std": 0.14443409575700666, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1149.4375, "completions/mean_terminated_length": 1149.4375, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.8195639127825565, "frac_reward_zero_std": 0.0, "grad_norm": 2.651592792486267, "kl": 0.0169525146484375, "learning_rate": 1.8653429844926333e-07, "loss": -0.0267, "num_tokens": 179504331.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9166392683982849, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10573554743032593, "rewards/wordcountpos_reward/raw_geo/std": 0.14612613325695475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1186.5625, "completions/mean_terminated_length": 1044.0909423828125, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.8197639527905581, "frac_reward_zero_std": 0.0, "grad_norm": 3.612885184842456, "kl": 0.018524169921875, "learning_rate": 1.8634912026059996e-07, "loss": 0.0325, "num_tokens": 179552188.0, "reward": -7.450580596923828e-09, "reward_std": 1.0022618770599365, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06609660491988383, "rewards/wordcountpos_reward/raw_geo/std": 0.1343590278041507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.131021626713557, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1134.375, "completions/mean_terminated_length": 1110.0, "completions/min_length": 935.0, "completions/min_terminated_length": 935.0, "epoch": 0.8199639927985597, "frac_reward_zero_std": 0.0, "grad_norm": 3.5474824526516877, "kl": 0.020050048828125, "learning_rate": 1.8616411938975684e-07, "loss": -0.0393, "num_tokens": 179593082.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9250596165657043, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17360750917960535, "rewards/wordcountpos_reward/raw_geo/std": 0.168620448686308, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 1067.0, "completions/mean_terminated_length": 1067.0, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.8201640328065614, "frac_reward_zero_std": 0.0, "grad_norm": 3.358769740196856, "kl": 0.0171051025390625, "learning_rate": 1.859792959269414e-07, "loss": 0.0088, "num_tokens": 179636202.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0385116338729858, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1250714656128039, "rewards/wordcountpos_reward/raw_geo/std": 0.23479827072794124, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1235.857177734375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.8203640728145629, "frac_reward_zero_std": 0.0, "grad_norm": 3.052527834556391, "kl": 0.0153350830078125, "learning_rate": 1.857946499622741e-07, "loss": -0.0227, "num_tokens": 179676920.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8186002969741821, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.060611542547964456, "rewards/wordcountpos_reward/raw_geo/std": 0.19609945893875724, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1304.0, "completions/mean_terminated_length": 1258.769287109375, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.8205641128225645, "frac_reward_zero_std": 0.0, "grad_norm": 2.718883034504098, "kl": 0.0168914794921875, "learning_rate": 1.8561018158578927e-07, "loss": -0.0507, "num_tokens": 179726072.0, "reward": -2.9802322387695312e-08, "reward_std": 0.41218942403793335, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07218278888758954, "rewards/wordcountpos_reward/raw_geo/std": 0.07421272247724976, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1084.75, "completions/mean_terminated_length": 1057.0667724609375, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.8207641528305661, "frac_reward_zero_std": 0.0, "grad_norm": 2.834763799968015, "kl": 0.016448974609375, "learning_rate": 1.8542589088743465e-07, "loss": 0.0049, "num_tokens": 179765556.0, "reward": -5.960464477539063e-08, "reward_std": 0.7741872072219849, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021195151954834325, "rewards/wordcountpos_reward/raw_geo/std": 0.06404925232804301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1358.5625, "completions/mean_terminated_length": 1248.5555419921875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.8209641928385677, "frac_reward_zero_std": 0.0, "grad_norm": 2.7011020024203414, "kl": 0.0146636962890625, "learning_rate": 1.8524177795707118e-07, "loss": -0.0046, "num_tokens": 179818861.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9666626453399658, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01822059776371872, "rewards/wordcountpos_reward/raw_geo/std": 0.1420110696530425, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1231.125, "completions/mean_terminated_length": 1213.2000732421875, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.8211642328465694, "frac_reward_zero_std": 0.0, "grad_norm": 2.881450914876157, "kl": 0.015411376953125, "learning_rate": 1.8505784288447305e-07, "loss": -0.0278, "num_tokens": 179865799.0, "reward": 0.0, "reward_std": 0.8492058515548706, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.055080953801470364, "rewards/wordcountpos_reward/raw_geo/std": 0.05501713539224772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222519, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1303.875, "completions/mean_terminated_length": 1275.857177734375, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.8213642728545709, "frac_reward_zero_std": 0.0, "grad_norm": 2.124407467311589, "kl": 0.00920867919921875, "learning_rate": 1.8487408575932786e-07, "loss": -0.0119, "num_tokens": 179905645.0, "reward": -4.470348358154297e-08, "reward_std": 0.9383877515792847, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026757347920091545, "rewards/wordcountpos_reward/raw_geo/std": 0.20011300923334616, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1379.6875, "completions/mean_terminated_length": 1286.111083984375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.8215643128625725, "frac_reward_zero_std": 0.0, "grad_norm": 2.7671397427292326, "kl": 0.0138702392578125, "learning_rate": 1.8469050667123642e-07, "loss": -0.0055, "num_tokens": 179960096.0, "reward": 0.0, "reward_std": 1.0687329769134521, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08998809679413931, "rewards/wordcountpos_reward/raw_geo/std": 0.10819903794026302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1122.0, "completions/mean_terminated_length": 1122.0, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.8217643528705741, "frac_reward_zero_std": 0.0, "grad_norm": 3.302076645466939, "kl": 0.02056884765625, "learning_rate": 1.8450710570971295e-07, "loss": 0.0182, "num_tokens": 180011016.0, "reward": 4.470348358154297e-08, "reward_std": 0.9939025640487671, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026823249535090663, "rewards/wordcountpos_reward/raw_geo/std": 0.06414103882792967, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1459.6875, "completions/mean_terminated_length": 1392.5, "completions/min_length": 1095.0, "completions/min_terminated_length": 1095.0, "epoch": 0.8219643928785757, "frac_reward_zero_std": 0.0, "grad_norm": 3.134779542891989, "kl": 0.017608642578125, "learning_rate": 1.8432388296418411e-07, "loss": -0.0091, "num_tokens": 180062011.0, "reward": 0.0, "reward_std": 0.715171217918396, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022743008025446175, "rewards/wordcountpos_reward/raw_geo/std": 0.24479030429602983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1185.1875, "completions/mean_terminated_length": 1185.1875, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.8221644328865774, "frac_reward_zero_std": 0.0, "grad_norm": 2.495486890373293, "kl": 0.0149383544921875, "learning_rate": 1.841408385239907e-07, "loss": -0.039, "num_tokens": 180113734.0, "reward": -8.940696716308594e-08, "reward_std": 0.794654369354248, "rewards/wordcountpos_reward/mean": -8.940696716308594e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05947495958337806, "rewards/wordcountpos_reward/raw_geo/std": 0.1482751665965536, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.094182643679026, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1064.0, "completions/max_terminated_length": 1064.0, "completions/mean_length": 979.25, "completions/mean_terminated_length": 979.25, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.8223644728945789, "frac_reward_zero_std": 0.0, "grad_norm": 3.504491625780587, "kl": 0.018096923828125, "learning_rate": 1.8395797247838562e-07, "loss": -0.0123, "num_tokens": 180156778.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6012568473815918, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0049531189957170724, "rewards/wordcountpos_reward/raw_geo/std": 0.10563699676347582, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1228.4375, "completions/mean_terminated_length": 1210.3333740234375, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.8225645129025805, "frac_reward_zero_std": 0.0, "grad_norm": 2.6684396663853374, "kl": 0.0164337158203125, "learning_rate": 1.8377528491653548e-07, "loss": 0.0117, "num_tokens": 180209465.0, "reward": 1.4901161193847656e-08, "reward_std": 0.948627233505249, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15007236289489778, "rewards/wordcountpos_reward/raw_geo/std": 0.09069295960322911, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1199.375, "completions/mean_terminated_length": 1130.0, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.8227645529105821, "frac_reward_zero_std": 0.0, "grad_norm": 3.3430871426509294, "kl": 0.0159759521484375, "learning_rate": 1.8359277592751916e-07, "loss": -0.0141, "num_tokens": 180262199.0, "reward": 3.725290298461914e-09, "reward_std": 0.929730236530304, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.08172706376828776, "rewards/wordcountpos_reward/raw_geo/std": 0.10255644475387997, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1463.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1208.6875, "completions/mean_terminated_length": 1208.6875, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.8229645929185837, "frac_reward_zero_std": 0.0, "grad_norm": 3.578723419232635, "kl": 0.021331787109375, "learning_rate": 1.834104456003293e-07, "loss": -0.0266, "num_tokens": 180305714.0, "reward": 0.0, "reward_std": 0.9297418594360352, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.20747990921253406, "rewards/wordcountpos_reward/raw_geo/std": 0.08787232263183817, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1161.9375, "completions/mean_terminated_length": 1139.4000244140625, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.8231646329265854, "frac_reward_zero_std": 0.0, "grad_norm": 2.9775423674728403, "kl": 0.0160064697265625, "learning_rate": 1.8322829402387075e-07, "loss": -0.0055, "num_tokens": 180353905.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9781287908554077, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0639246554098297, "rewards/wordcountpos_reward/raw_geo/std": 0.1745157594502162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1069.25, "completions/mean_terminated_length": 1007.71435546875, "completions/min_length": 732.0, "completions/min_terminated_length": 732.0, "epoch": 0.8233646729345869, "frac_reward_zero_std": 0.0, "grad_norm": 3.296609985124161, "kl": 0.0171356201171875, "learning_rate": 1.830463212869615e-07, "loss": 0.0468, "num_tokens": 180395957.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9395303726196289, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08292703211118968, "rewards/wordcountpos_reward/raw_geo/std": 0.19026602579008822, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10292032157252812, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1191.6875, "completions/mean_terminated_length": 1171.1334228515625, "completions/min_length": 795.0, "completions/min_terminated_length": 795.0, "epoch": 0.8235647129425885, "frac_reward_zero_std": 0.0, "grad_norm": 3.2601820348559705, "kl": 0.01678466796875, "learning_rate": 1.828645274783323e-07, "loss": -0.0284, "num_tokens": 180435024.0, "reward": -4.470348358154297e-08, "reward_std": 0.9844897389411926, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.057155406734438245, "rewards/wordcountpos_reward/raw_geo/std": 0.08639932655997241, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1184.1875, "completions/mean_terminated_length": 1139.071533203125, "completions/min_length": 844.0, "completions/min_terminated_length": 844.0, "epoch": 0.8237647529505902, "frac_reward_zero_std": 0.0, "grad_norm": 3.159310837135786, "kl": 0.0186614990234375, "learning_rate": 1.826829126866267e-07, "loss": 0.0001, "num_tokens": 180478163.0, "reward": -7.450580596923828e-09, "reward_std": 0.9978146553039551, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.15378358394199854, "rewards/wordcountpos_reward/raw_geo/std": 0.1222614214738339, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1039.6875, "completions/mean_terminated_length": 1039.6875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.8239647929585917, "frac_reward_zero_std": 0.0, "grad_norm": 3.375681163061087, "kl": 0.018280029296875, "learning_rate": 1.8250147700040074e-07, "loss": -0.0294, "num_tokens": 180507062.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8583930730819702, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0752803547376027, "rewards/wordcountpos_reward/raw_geo/std": 0.14040901101459474, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 990.0625, "completions/mean_terminated_length": 990.0625, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.8241648329665933, "frac_reward_zero_std": 0.0, "grad_norm": 3.035143614081816, "kl": 0.0178985595703125, "learning_rate": 1.823202205081234e-07, "loss": -0.0385, "num_tokens": 180541231.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8760338425636292, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0768057697350888, "rewards/wordcountpos_reward/raw_geo/std": 0.06580147430302609, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1270.0, "completions/max_terminated_length": 1270.0, "completions/mean_length": 1114.125, "completions/mean_terminated_length": 1114.125, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.8243648729745949, "frac_reward_zero_std": 0.0, "grad_norm": 2.4929762020184394, "kl": 0.01544189453125, "learning_rate": 1.8213914329817611e-07, "loss": -0.0274, "num_tokens": 180581121.0, "reward": 0.0, "reward_std": 0.9551975727081299, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09167639399841208, "rewards/wordcountpos_reward/raw_geo/std": 0.07676473858124011, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 908.625, "completions/mean_terminated_length": 908.625, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.8245649129825965, "frac_reward_zero_std": 0.0, "grad_norm": 2.777537742670375, "kl": 0.013092041015625, "learning_rate": 1.8195824545885306e-07, "loss": -0.0246, "num_tokens": 180624443.0, "reward": 0.0, "reward_std": 0.8936692476272583, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028490122002007786, "rewards/wordcountpos_reward/raw_geo/std": 0.13758478051574344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1263.0625, "completions/mean_terminated_length": 1247.2667236328125, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.8247649529905982, "frac_reward_zero_std": 0.0, "grad_norm": 2.8841018152674716, "kl": 0.0176849365234375, "learning_rate": 1.817775270783607e-07, "loss": -0.022, "num_tokens": 180665324.0, "reward": 7.450580596923828e-09, "reward_std": 1.0113445520401, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0786832793375237, "rewards/wordcountpos_reward/raw_geo/std": 0.07634583111493433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503961, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1118.8125, "completions/mean_terminated_length": 991.75, "completions/min_length": 665.0, "completions/min_terminated_length": 665.0, "epoch": 0.8249649929985997, "frac_reward_zero_std": 0.0, "grad_norm": 3.4793726946081205, "kl": 0.01678466796875, "learning_rate": 1.8159698824481816e-07, "loss": 0.0004, "num_tokens": 180706393.0, "reward": 0.0, "reward_std": 1.0525057315826416, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0592678710111904, "rewards/wordcountpos_reward/raw_geo/std": 0.09230041016181068, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1099.9375, "completions/mean_terminated_length": 1099.9375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.8251650330066013, "frac_reward_zero_std": 0.0, "grad_norm": 3.360288432815373, "kl": 0.0180511474609375, "learning_rate": 1.8141662904625705e-07, "loss": -0.0369, "num_tokens": 180740032.0, "reward": 0.0, "reward_std": 0.6527832746505737, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06701966334375438, "rewards/wordcountpos_reward/raw_geo/std": 0.08685426505140353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1215.5625, "completions/mean_terminated_length": 1196.60009765625, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8253650730146029, "frac_reward_zero_std": 0.0, "grad_norm": 3.254563437890517, "kl": 0.020904541015625, "learning_rate": 1.8123644957062146e-07, "loss": -0.018, "num_tokens": 180781889.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0237940549850464, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03364031455779635, "rewards/wordcountpos_reward/raw_geo/std": 0.11385371921504633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1092.375, "completions/mean_terminated_length": 1092.375, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.8255651130226045, "frac_reward_zero_std": 0.0, "grad_norm": 3.7203310066305684, "kl": 0.02191162109375, "learning_rate": 1.8105644990576742e-07, "loss": 0.0045, "num_tokens": 180832471.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9584295153617859, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.34476601130775036, "rewards/wordcountpos_reward/raw_geo/std": 0.12103525088979734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784837, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1057.0, "completions/max_terminated_length": 1057.0, "completions/mean_length": 887.375, "completions/mean_terminated_length": 887.375, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.8257651530306062, "frac_reward_zero_std": 0.0, "grad_norm": 3.1678347441547583, "kl": 0.0157012939453125, "learning_rate": 1.8087663013946376e-07, "loss": -0.0263, "num_tokens": 180868093.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7168399095535278, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08993645575818554, "rewards/wordcountpos_reward/raw_geo/std": 0.0925750524838751, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.16865480854231357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1106.5, "completions/mean_terminated_length": 1080.2667236328125, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.8259651930386077, "frac_reward_zero_std": 0.0, "grad_norm": 3.300257453431251, "kl": 0.019866943359375, "learning_rate": 1.8069699035939138e-07, "loss": -0.0587, "num_tokens": 180900157.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9906886219978333, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009956228804884316, "rewards/wordcountpos_reward/raw_geo/std": 0.039360572986932976, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1160.9375, "completions/mean_terminated_length": 1006.8182373046875, "completions/min_length": 608.0, "completions/min_terminated_length": 608.0, "epoch": 0.8261652330466093, "frac_reward_zero_std": 0.0, "grad_norm": 3.5578628370444285, "kl": 0.016754150390625, "learning_rate": 1.8051753065314363e-07, "loss": 0.0731, "num_tokens": 180954092.0, "reward": 0.0, "reward_std": 0.9682924747467041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.44458564803327727, "rewards/wordcountpos_reward/raw_geo/std": 0.2512658692930529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466157, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1271.9375, "completions/mean_terminated_length": 1219.3077392578125, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.8263652730546109, "frac_reward_zero_std": 0.0, "grad_norm": 2.99492689769976, "kl": 0.014617919921875, "learning_rate": 1.8033825110822542e-07, "loss": 0.0131, "num_tokens": 181001123.0, "reward": 0.0, "reward_std": 1.0227949619293213, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1331086155471137, "rewards/wordcountpos_reward/raw_geo/std": 0.09170558369593368, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1167.0, "completions/max_terminated_length": 1167.0, "completions/mean_length": 1010.875, "completions/mean_terminated_length": 1010.875, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.8265653130626125, "frac_reward_zero_std": 0.0, "grad_norm": 2.311447948069117, "kl": 0.011138916015625, "learning_rate": 1.8015915181205486e-07, "loss": -0.0284, "num_tokens": 181038689.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0008397102355957, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021558725221923467, "rewards/wordcountpos_reward/raw_geo/std": 0.15330334859770728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1268.5, "completions/mean_terminated_length": 1215.0770263671875, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.8267653530706142, "frac_reward_zero_std": 0.0, "grad_norm": 3.12816302194529, "kl": 0.017333984375, "learning_rate": 1.799802328519613e-07, "loss": -0.0082, "num_tokens": 181085145.0, "reward": 3.725290298461914e-08, "reward_std": 1.007598876953125, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1885172166388121, "rewards/wordcountpos_reward/raw_geo/std": 0.1807204567616063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1086.875, "completions/mean_terminated_length": 1086.875, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.8269653930786157, "frac_reward_zero_std": 0.0, "grad_norm": 3.129813998141374, "kl": 0.0160675048828125, "learning_rate": 1.7980149431518615e-07, "loss": -0.019, "num_tokens": 181133823.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8938741683959961, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07865603023127463, "rewards/wordcountpos_reward/raw_geo/std": 0.0968569606041315, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1016.0625, "completions/mean_terminated_length": 1016.0625, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.8271654330866173, "frac_reward_zero_std": 0.0, "grad_norm": 2.740073632220367, "kl": 0.0145111083984375, "learning_rate": 1.7962293628888363e-07, "loss": -0.0523, "num_tokens": 181184280.0, "reward": 3.725290298461914e-09, "reward_std": 1.0180604457855225, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.10322350662437317, "rewards/wordcountpos_reward/raw_geo/std": 0.10806989954784503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11925695879998881, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1258.75, "completions/mean_terminated_length": 1114.0, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.8273654730946189, "frac_reward_zero_std": 0.0, "grad_norm": 3.284446829891586, "kl": 0.01904296875, "learning_rate": 1.7944455886011917e-07, "loss": 0.0103, "num_tokens": 181238244.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9773375391960144, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.008998556484627392, "rewards/wordcountpos_reward/raw_geo/std": 0.12274691364804968, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1121.6875, "completions/mean_terminated_length": 1096.4666748046875, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.8275655131026205, "frac_reward_zero_std": 0.0, "grad_norm": 3.8824472405117776, "kl": 0.02130126953125, "learning_rate": 1.7926636211587054e-07, "loss": -0.0296, "num_tokens": 181289015.0, "reward": -2.9802322387695312e-08, "reward_std": 0.785582423210144, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07888183474236383, "rewards/wordcountpos_reward/raw_geo/std": 0.060172909750747156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1140987226857449, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 1067.9375, "completions/mean_terminated_length": 1067.9375, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.8277655531106222, "frac_reward_zero_std": 0.0, "grad_norm": 3.2623702232096212, "kl": 0.016998291015625, "learning_rate": 1.7908834614302697e-07, "loss": 0.0405, "num_tokens": 181336374.0, "reward": 0.0, "reward_std": 0.7767361998558044, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09134488770657115, "rewards/wordcountpos_reward/raw_geo/std": 0.13134867513350348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1231.3125, "completions/mean_terminated_length": 1213.4000244140625, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.8279655931186237, "frac_reward_zero_std": 0.0, "grad_norm": 3.1852571741971727, "kl": 0.019012451171875, "learning_rate": 1.7891051102839033e-07, "loss": 0.0463, "num_tokens": 181376859.0, "reward": 0.0, "reward_std": 0.6603677272796631, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03745278058354791, "rewards/wordcountpos_reward/raw_geo/std": 0.06234480875954559, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1137.5, "completions/mean_terminated_length": 1137.5, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.8281656331266253, "frac_reward_zero_std": 0.0, "grad_norm": 2.8729371854284977, "kl": 0.01641845703125, "learning_rate": 1.7873285685867345e-07, "loss": 0.0133, "num_tokens": 181409707.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9450466632843018, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019448174820408982, "rewards/wordcountpos_reward/raw_geo/std": 0.07656420817720128, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 890.5625, "completions/mean_terminated_length": 890.5625, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.828365673134627, "frac_reward_zero_std": 0.0, "grad_norm": 3.716279351815086, "kl": 0.01873779296875, "learning_rate": 1.7855538372050148e-07, "loss": -0.0074, "num_tokens": 181450460.0, "reward": 0.0, "reward_std": 1.0207929611206055, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08409406977182853, "rewards/wordcountpos_reward/raw_geo/std": 0.04555585566422374, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1143.4375, "completions/mean_terminated_length": 1143.4375, "completions/min_length": 878.0, "completions/min_terminated_length": 878.0, "epoch": 0.8285657131426285, "frac_reward_zero_std": 0.0, "grad_norm": 2.75250378907843, "kl": 0.0174102783203125, "learning_rate": 1.7837809170041112e-07, "loss": -0.0359, "num_tokens": 181499971.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8714861869812012, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05582438880739462, "rewards/wordcountpos_reward/raw_geo/std": 0.13355753077495472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10390522747338701, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1158.875, "completions/mean_terminated_length": 1136.1334228515625, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8287657531506302, "frac_reward_zero_std": 0.0, "grad_norm": 2.706067601346658, "kl": 0.0144195556640625, "learning_rate": 1.782009808848509e-07, "loss": -0.0205, "num_tokens": 181542641.0, "reward": 0.0, "reward_std": 0.8842585682868958, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13006167828644913, "rewards/wordcountpos_reward/raw_geo/std": 0.0781554178997406, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1204.1875, "completions/mean_terminated_length": 974.1111450195312, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.8289657931586317, "frac_reward_zero_std": 0.0, "grad_norm": 2.4312409067667438, "kl": 0.0134735107421875, "learning_rate": 1.7802405136018052e-07, "loss": 0.0429, "num_tokens": 181594668.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8138139247894287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.000812064323744241, "rewards/wordcountpos_reward/raw_geo/std": 0.2892492090930117, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1314.3125, "completions/mean_terminated_length": 1287.7857666015625, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.8291658331666333, "frac_reward_zero_std": 0.0, "grad_norm": 3.129756529415087, "kl": 0.01983642578125, "learning_rate": 1.7784730321267182e-07, "loss": -0.0037, "num_tokens": 181643449.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5414689183235168, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26327195294029676, "rewards/wordcountpos_reward/raw_geo/std": 0.3368964980380672, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1352.9375, "completions/mean_terminated_length": 1319.0, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.829365873174635, "frac_reward_zero_std": 0.0, "grad_norm": 2.5536052266713276, "kl": 0.011932373046875, "learning_rate": 1.77670736528508e-07, "loss": -0.0117, "num_tokens": 181688968.0, "reward": 0.0, "reward_std": 0.6399260759353638, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11599772356528232, "rewards/wordcountpos_reward/raw_geo/std": 0.0611261894112512, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1369.3125, "completions/mean_terminated_length": 1325.75, "completions/min_length": 1161.0, "completions/min_terminated_length": 1161.0, "epoch": 0.8295659131826365, "frac_reward_zero_std": 0.0, "grad_norm": 3.1964168639493717, "kl": 0.0156097412109375, "learning_rate": 1.7749435139378386e-07, "loss": -0.0053, "num_tokens": 181743533.0, "reward": 1.1175870895385742e-08, "reward_std": 1.052491307258606, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08386290293076819, "rewards/wordcountpos_reward/raw_geo/std": 0.03742300668399057, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1171.3125, "completions/mean_terminated_length": 1171.3125, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.8297659531906382, "frac_reward_zero_std": 0.0, "grad_norm": 2.7247270988267296, "kl": 0.0154266357421875, "learning_rate": 1.773181478945054e-07, "loss": 0.0268, "num_tokens": 181787546.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7250285744667053, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08553324047718586, "rewards/wordcountpos_reward/raw_geo/std": 0.2784203872738641, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1165.3125, "completions/mean_terminated_length": 1143.0, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8299659931986397, "frac_reward_zero_std": 0.0, "grad_norm": 3.1758192155748888, "kl": 0.017059326171875, "learning_rate": 1.771421261165903e-07, "loss": -0.023, "num_tokens": 181820655.0, "reward": 0.0, "reward_std": 0.6844391822814941, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06391222104636066, "rewards/wordcountpos_reward/raw_geo/std": 0.14723130426287065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1405.5625, "completions/mean_terminated_length": 1332.111083984375, "completions/min_length": 1240.0, "completions/min_terminated_length": 1240.0, "epoch": 0.8301660332066413, "frac_reward_zero_std": 0.0, "grad_norm": 2.9717257609042385, "kl": 0.0145111083984375, "learning_rate": 1.7696628614586768e-07, "loss": -0.0056, "num_tokens": 181879488.0, "reward": 0.0, "reward_std": 0.7087894678115845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1696236110433006, "rewards/wordcountpos_reward/raw_geo/std": 0.10458547138938716, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1549193338482967, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1130.125, "completions/mean_terminated_length": 1130.125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.830366073214643, "frac_reward_zero_std": 0.0, "grad_norm": 2.8266239865364664, "kl": 0.0123443603515625, "learning_rate": 1.7679062806807793e-07, "loss": -0.0083, "num_tokens": 181921970.0, "reward": 0.0, "reward_std": 0.8170619010925293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06150253714845234, "rewards/wordcountpos_reward/raw_geo/std": 0.05793031204635703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921945, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1365.3125, "completions/mean_terminated_length": 1260.5555419921875, "completions/min_length": 1189.0, "completions/min_terminated_length": 1189.0, "epoch": 0.8305661132226445, "frac_reward_zero_std": 0.0, "grad_norm": 2.262799294387137, "kl": 0.00745391845703125, "learning_rate": 1.766151519688725e-07, "loss": -0.0188, "num_tokens": 181972815.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0052040815353394, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.37767739689821184, "rewards/wordcountpos_reward/raw_geo/std": 0.3034141156712894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1302.0625, "completions/mean_terminated_length": 1273.7857666015625, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.8307661532306462, "frac_reward_zero_std": 0.0, "grad_norm": 3.0824450458919452, "kl": 0.014404296875, "learning_rate": 1.7643985793381443e-07, "loss": -0.0016, "num_tokens": 182021216.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7305567264556885, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04437369368321931, "rewards/wordcountpos_reward/raw_geo/std": 0.06738504849937106, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 932.6875, "completions/mean_terminated_length": 932.6875, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.8309661932386477, "frac_reward_zero_std": 0.0, "grad_norm": 3.8361498925641517, "kl": 0.024505615234375, "learning_rate": 1.7626474604837805e-07, "loss": -0.0219, "num_tokens": 182060411.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9414282441139221, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.31237027545647467, "rewards/wordcountpos_reward/raw_geo/std": 0.18032985456694267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1378.875, "completions/mean_terminated_length": 1306.2000732421875, "completions/min_length": 1140.0, "completions/min_terminated_length": 1140.0, "epoch": 0.8311662332466493, "frac_reward_zero_std": 0.0, "grad_norm": 2.993744139967783, "kl": 0.0178985595703125, "learning_rate": 1.7608981639794868e-07, "loss": -0.003, "num_tokens": 182111385.0, "reward": 0.0, "reward_std": 0.7185910940170288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2151154987807239, "rewards/wordcountpos_reward/raw_geo/std": 0.18157356134930067, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1186.8125, "completions/mean_terminated_length": 1165.933349609375, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.831366273254651, "frac_reward_zero_std": 0.0, "grad_norm": 3.2210443202948804, "kl": 0.0170745849609375, "learning_rate": 1.7591506906782244e-07, "loss": 0.0264, "num_tokens": 182163166.0, "reward": 0.0, "reward_std": 0.8466078042984009, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10335248943356308, "rewards/wordcountpos_reward/raw_geo/std": 0.12538959630475985, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 1139.125, "completions/mean_terminated_length": 975.0909423828125, "completions/min_length": 558.0, "completions/min_terminated_length": 558.0, "epoch": 0.8315663132626525, "frac_reward_zero_std": 0.0, "grad_norm": 2.870582153725264, "kl": 0.013671875, "learning_rate": 1.7574050414320745e-07, "loss": -0.0472, "num_tokens": 182209680.0, "reward": -2.9802322387695312e-08, "reward_std": 0.968745231628418, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1423812332172947, "rewards/wordcountpos_reward/raw_geo/std": 0.10876144651337914, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1118.1875, "completions/mean_terminated_length": 1092.7333984375, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.8317663532706542, "frac_reward_zero_std": 0.0, "grad_norm": 3.400597656946256, "kl": 0.0149688720703125, "learning_rate": 1.7556612170922202e-07, "loss": 0.0098, "num_tokens": 182253699.0, "reward": 0.0, "reward_std": 0.6601122617721558, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10674014111239487, "rewards/wordcountpos_reward/raw_geo/std": 0.21725961626268978, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1261979632400061, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 967.8125, "completions/mean_terminated_length": 967.8125, "completions/min_length": 749.0, "completions/min_terminated_length": 749.0, "epoch": 0.8319663932786557, "frac_reward_zero_std": 0.0, "grad_norm": 2.5818981372345644, "kl": 0.01142120361328125, "learning_rate": 1.7539192185089602e-07, "loss": -0.0289, "num_tokens": 182288384.0, "reward": 0.0, "reward_std": 0.5984501838684082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0075209234455286145, "rewards/wordcountpos_reward/raw_geo/std": 0.04029581075263994, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1103.8125, "completions/mean_terminated_length": 1103.8125, "completions/min_length": 887.0, "completions/min_terminated_length": 887.0, "epoch": 0.8321664332866573, "frac_reward_zero_std": 0.0, "grad_norm": 2.4622491817478713, "kl": 0.0116119384765625, "learning_rate": 1.7521790465316976e-07, "loss": -0.0075, "num_tokens": 182328789.0, "reward": -4.470348358154297e-08, "reward_std": 0.898263156414032, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.017883186097614776, "rewards/wordcountpos_reward/raw_geo/std": 0.12720516445269558, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1330.125, "completions/mean_terminated_length": 1228.2000732421875, "completions/min_length": 1005.0, "completions/min_terminated_length": 1005.0, "epoch": 0.832366473294659, "frac_reward_zero_std": 0.0, "grad_norm": 3.0030525178147855, "kl": 0.015533447265625, "learning_rate": 1.7504407020089528e-07, "loss": -0.0282, "num_tokens": 182385287.0, "reward": 2.9802322387695312e-08, "reward_std": 0.636249840259552, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14571595323930703, "rewards/wordcountpos_reward/raw_geo/std": 0.319335043725322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 967.25, "completions/mean_terminated_length": 967.25, "completions/min_length": 649.0, "completions/min_terminated_length": 649.0, "epoch": 0.8325665133026605, "frac_reward_zero_std": 0.0, "grad_norm": 3.360689842990387, "kl": 0.014495849609375, "learning_rate": 1.7487041857883467e-07, "loss": 0.0034, "num_tokens": 182414691.0, "reward": 0.0, "reward_std": 0.8017953634262085, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0010234212101500849, "rewards/wordcountpos_reward/raw_geo/std": 0.11367574866972995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1267.0, "completions/mean_terminated_length": 1233.71435546875, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.8327665533106622, "frac_reward_zero_std": 0.0, "grad_norm": 2.2224994619878498, "kl": 0.010601043701171875, "learning_rate": 1.7469694987166143e-07, "loss": 0.0096, "num_tokens": 182458971.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7255486249923706, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18776024814601033, "rewards/wordcountpos_reward/raw_geo/std": 0.2246182868716821, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1103.0, "completions/mean_terminated_length": 1103.0, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.8329665933186637, "frac_reward_zero_std": 0.0, "grad_norm": 2.844507334937442, "kl": 0.01324462890625, "learning_rate": 1.745236641639596e-07, "loss": -0.0301, "num_tokens": 182497131.0, "reward": -2.9802322387695312e-08, "reward_std": 0.39844000339508057, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09777539444035169, "rewards/wordcountpos_reward/raw_geo/std": 0.13095211704474397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1143.4375, "completions/mean_terminated_length": 1119.666748046875, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.8331666333266653, "frac_reward_zero_std": 0.0, "grad_norm": 3.4820754965455114, "kl": 0.016845703125, "learning_rate": 1.7435056154022427e-07, "loss": 0.0077, "num_tokens": 182538802.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6064894199371338, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11267790620682397, "rewards/wordcountpos_reward/raw_geo/std": 0.20407390440066528, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 999.5, "completions/mean_terminated_length": 999.5, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.833366673334667, "frac_reward_zero_std": 0.0, "grad_norm": 3.735508051475552, "kl": 0.024078369140625, "learning_rate": 1.7417764208486077e-07, "loss": 0.0276, "num_tokens": 182570834.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7981045246124268, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036880262938685536, "rewards/wordcountpos_reward/raw_geo/std": 0.07938601895182887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 977.8125, "completions/mean_terminated_length": 977.8125, "completions/min_length": 686.0, "completions/min_terminated_length": 686.0, "epoch": 0.8335667133426685, "frac_reward_zero_std": 0.0, "grad_norm": 2.7558847362579164, "kl": 0.0092010498046875, "learning_rate": 1.7400490588218564e-07, "loss": -0.0286, "num_tokens": 182615159.0, "reward": -5.960464477539063e-08, "reward_std": 0.7644119262695312, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21567995955566202, "rewards/wordcountpos_reward/raw_geo/std": 0.2245868949716291, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454343, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 949.0, "completions/mean_length": 1173.125, "completions/mean_terminated_length": 846.25, "completions/min_length": 595.0, "completions/min_terminated_length": 595.0, "epoch": 0.8337667533506702, "frac_reward_zero_std": 0.0, "grad_norm": 3.0718235253374955, "kl": 0.0165557861328125, "learning_rate": 1.738323530164257e-07, "loss": -0.0467, "num_tokens": 182667745.0, "reward": 0.0, "reward_std": 0.9998313784599304, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15048978618520872, "rewards/wordcountpos_reward/raw_geo/std": 0.19057323940792456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.16487930490266262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1166.75, "completions/mean_terminated_length": 1119.1429443359375, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.8339667933586717, "frac_reward_zero_std": 0.0, "grad_norm": 3.282636704147569, "kl": 0.018035888671875, "learning_rate": 1.7365998357171868e-07, "loss": -0.0504, "num_tokens": 182714085.0, "reward": 4.470348358154297e-08, "reward_std": 1.0216281414031982, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04841072810729229, "rewards/wordcountpos_reward/raw_geo/std": 0.06928102667871522, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1146.875, "completions/mean_terminated_length": 1146.875, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.8341668333666733, "frac_reward_zero_std": 0.0, "grad_norm": 3.4247222980370786, "kl": 0.015350341796875, "learning_rate": 1.7348779763211247e-07, "loss": -0.04, "num_tokens": 182762107.0, "reward": 0.0, "reward_std": 0.6106992959976196, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2946749709818212, "rewards/wordcountpos_reward/raw_geo/std": 0.09226546452941133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215289, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1035.5, "completions/mean_terminated_length": 1035.5, "completions/min_length": 762.0, "completions/min_terminated_length": 762.0, "epoch": 0.834366873374675, "frac_reward_zero_std": 0.0, "grad_norm": 3.3502848532711234, "kl": 0.016632080078125, "learning_rate": 1.7331579528156586e-07, "loss": -0.0123, "num_tokens": 182792491.0, "reward": 0.0, "reward_std": 0.6563529372215271, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10177827531880684, "rewards/wordcountpos_reward/raw_geo/std": 0.12554128584700439, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1255.25, "completions/mean_terminated_length": 1255.25, "completions/min_length": 1165.0, "completions/min_terminated_length": 1165.0, "epoch": 0.8345669133826765, "frac_reward_zero_std": 0.0, "grad_norm": 2.0527484012631105, "kl": 0.0090484619140625, "learning_rate": 1.7314397660394796e-07, "loss": -0.0298, "num_tokens": 182837775.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7454309463500977, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06509232075889712, "rewards/wordcountpos_reward/raw_geo/std": 0.034052190726374686, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1223.3125, "completions/mean_terminated_length": 1159.4615478515625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.8347669533906782, "frac_reward_zero_std": 0.0, "grad_norm": 3.324388600540006, "kl": 0.0175933837890625, "learning_rate": 1.729723416830385e-07, "loss": 0.0133, "num_tokens": 182886996.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9405474662780762, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12423303890629331, "rewards/wordcountpos_reward/raw_geo/std": 0.20470764692949858, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027817, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1112.1875, "completions/mean_terminated_length": 1112.1875, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.8349669933986797, "frac_reward_zero_std": 0.0, "grad_norm": 3.0153326138391394, "kl": 0.01544189453125, "learning_rate": 1.7280089060252708e-07, "loss": 0.0267, "num_tokens": 182926447.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9110355377197266, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041105638755005454, "rewards/wordcountpos_reward/raw_geo/std": 0.2541423784609895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1206.25, "completions/mean_terminated_length": 1206.25, "completions/min_length": 1101.0, "completions/min_terminated_length": 1101.0, "epoch": 0.8351670334066813, "frac_reward_zero_std": 0.0, "grad_norm": 3.012768960027429, "kl": 0.0140380859375, "learning_rate": 1.726296234460143e-07, "loss": 0.0053, "num_tokens": 182969507.0, "reward": -5.960464477539063e-08, "reward_std": 0.8620928525924683, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013380015346395728, "rewards/wordcountpos_reward/raw_geo/std": 0.0940311565504974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116196, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 932.75, "completions/mean_terminated_length": 932.75, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.835367073414683, "frac_reward_zero_std": 0.0, "grad_norm": 3.8194167628124633, "kl": 0.0191650390625, "learning_rate": 1.7245854029701067e-07, "loss": 0.0069, "num_tokens": 183011783.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9918914437294006, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07086100099547067, "rewards/wordcountpos_reward/raw_geo/std": 0.10667248685296363, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1464.1875, "completions/mean_terminated_length": 1404.5, "completions/min_length": 1205.0, "completions/min_terminated_length": 1205.0, "epoch": 0.8355671134226845, "frac_reward_zero_std": 0.0, "grad_norm": 2.7765647070736215, "kl": 0.0164947509765625, "learning_rate": 1.7228764123893736e-07, "loss": 0.0041, "num_tokens": 183059858.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7378441095352173, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23183799623401452, "rewards/wordcountpos_reward/raw_geo/std": 0.25874317006128256, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1242.4375, "completions/mean_terminated_length": 1225.2667236328125, "completions/min_length": 1058.0, "completions/min_terminated_length": 1058.0, "epoch": 0.8357671534306861, "frac_reward_zero_std": 0.0, "grad_norm": 3.3431788353769254, "kl": 0.019927978515625, "learning_rate": 1.7211692635512502e-07, "loss": 0.0003, "num_tokens": 183112345.0, "reward": 0.0, "reward_std": 1.0532344579696655, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.005063283080337688, "rewards/wordcountpos_reward/raw_geo/std": 0.24770919069706437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1224.8125, "completions/mean_terminated_length": 1185.5, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.8359671934386878, "frac_reward_zero_std": 0.0, "grad_norm": 2.5765976994646436, "kl": 0.0116119384765625, "learning_rate": 1.7194639572881558e-07, "loss": -0.0141, "num_tokens": 183158838.0, "reward": 0.0, "reward_std": 0.6134829521179199, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12960177759292407, "rewards/wordcountpos_reward/raw_geo/std": 0.2790059134228377, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1268.875, "completions/mean_terminated_length": 1235.857177734375, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.8361672334466893, "frac_reward_zero_std": 0.0, "grad_norm": 2.9112470430487978, "kl": 0.0168609619140625, "learning_rate": 1.717760494431602e-07, "loss": 0.023, "num_tokens": 183204996.0, "reward": 0.0, "reward_std": 0.9612973928451538, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.01848440769933141, "rewards/wordcountpos_reward/raw_geo/std": 0.07802651644199639, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 996.6875, "completions/mean_terminated_length": 996.6875, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.836367273454691, "frac_reward_zero_std": 0.0, "grad_norm": 3.20365805783637, "kl": 0.0203857421875, "learning_rate": 1.716058875812207e-07, "loss": -0.0504, "num_tokens": 183245959.0, "reward": 2.9802322387695312e-08, "reward_std": 0.44188615679740906, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02931424310631804, "rewards/wordcountpos_reward/raw_geo/std": 0.032137707219950275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1161.6875, "completions/mean_terminated_length": 1139.1334228515625, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.8365673134626925, "frac_reward_zero_std": 0.0, "grad_norm": 3.5426765479533557, "kl": 0.018829345703125, "learning_rate": 1.7143591022596842e-07, "loss": -0.0304, "num_tokens": 183294090.0, "reward": 0.0, "reward_std": 0.8757162690162659, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09077241819960272, "rewards/wordcountpos_reward/raw_geo/std": 0.14627633953058874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12988598989256067, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1096.3125, "completions/mean_terminated_length": 1038.6429443359375, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.8367673534706941, "frac_reward_zero_std": 0.0, "grad_norm": 2.940812249180084, "kl": 0.0151214599609375, "learning_rate": 1.7126611746028556e-07, "loss": -0.0229, "num_tokens": 183338319.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0487014055252075, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054451621739289656, "rewards/wordcountpos_reward/raw_geo/std": 0.07204983788320186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1226.625, "completions/mean_terminated_length": 1208.4000244140625, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.8369673934786958, "frac_reward_zero_std": 0.0, "grad_norm": 2.9994070716770707, "kl": 0.0175933837890625, "learning_rate": 1.7109650936696353e-07, "loss": -0.0304, "num_tokens": 183389729.0, "reward": 0.0, "reward_std": 0.9412825107574463, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15994896871082445, "rewards/wordcountpos_reward/raw_geo/std": 0.05189306730622507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1075.6875, "completions/mean_terminated_length": 1075.6875, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.8371674334866973, "frac_reward_zero_std": 0.0, "grad_norm": 3.6754263656913078, "kl": 0.017730712890625, "learning_rate": 1.709270860287041e-07, "loss": -0.021, "num_tokens": 183422044.0, "reward": 0.0, "reward_std": 1.0268912315368652, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07734371728499634, "rewards/wordcountpos_reward/raw_geo/std": 0.063604357001939, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 976.875, "completions/mean_terminated_length": 976.875, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.837367473494699, "frac_reward_zero_std": 0.0, "grad_norm": 3.658260008530839, "kl": 0.020294189453125, "learning_rate": 1.7075784752811885e-07, "loss": -0.0059, "num_tokens": 183471146.0, "reward": 2.60770320892334e-08, "reward_std": 1.0583555698394775, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09347812361589415, "rewards/wordcountpos_reward/raw_geo/std": 0.07999094102321665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1259.875, "completions/mean_terminated_length": 1225.571533203125, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.8375675135027005, "frac_reward_zero_std": 0.0, "grad_norm": 2.741321478786601, "kl": 0.0144500732421875, "learning_rate": 1.7058879394772932e-07, "loss": -0.0032, "num_tokens": 183515376.0, "reward": 0.0, "reward_std": 0.6441935300827026, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0577078391088571, "rewards/wordcountpos_reward/raw_geo/std": 0.057541437507129416, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575906, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1252.1875, "completions/mean_terminated_length": 1195.0, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.8377675535107021, "frac_reward_zero_std": 0.0, "grad_norm": 2.389045123235856, "kl": 0.018035888671875, "learning_rate": 1.7041992536996674e-07, "loss": -0.1661, "num_tokens": 183567035.0, "reward": -1.4901161193847656e-08, "reward_std": 1.052034616470337, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1399960046571573, "rewards/wordcountpos_reward/raw_geo/std": 0.06624447304612016, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.18519259244445035, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1031.3125, "completions/mean_terminated_length": 1000.0667114257812, "completions/min_length": 608.0, "completions/min_terminated_length": 608.0, "epoch": 0.8379675935187038, "frac_reward_zero_std": 0.0, "grad_norm": 2.889489755691195, "kl": 0.01654052734375, "learning_rate": 1.7025124187717205e-07, "loss": 0.0158, "num_tokens": 183608456.0, "reward": 0.0, "reward_std": 0.7110668420791626, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.051660398631155194, "rewards/wordcountpos_reward/raw_geo/std": 0.09786189008948892, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1140.0625, "completions/mean_terminated_length": 1088.6429443359375, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.8381676335267053, "frac_reward_zero_std": 0.0, "grad_norm": 3.0230338564115162, "kl": 0.01678466796875, "learning_rate": 1.7008274355159635e-07, "loss": -0.0169, "num_tokens": 183653617.0, "reward": 0.0, "reward_std": 0.6574379205703735, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05366441551112263, "rewards/wordcountpos_reward/raw_geo/std": 0.18294839962394904, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1043.4375, "completions/mean_terminated_length": 938.0769653320312, "completions/min_length": 637.0, "completions/min_terminated_length": 637.0, "epoch": 0.838367673534707, "frac_reward_zero_std": 0.0, "grad_norm": 3.6242314719728665, "kl": 0.0189208984375, "learning_rate": 1.6991443047539996e-07, "loss": -0.1144, "num_tokens": 183706184.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8772252202033997, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0066949300474317755, "rewards/wordcountpos_reward/raw_geo/std": 0.0728810957086676, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1377060745318193, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 1040.3125, "completions/mean_terminated_length": 1040.3125, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.8385677135427085, "frac_reward_zero_std": 0.0, "grad_norm": 2.3160688120754167, "kl": 0.0129547119140625, "learning_rate": 1.6974630273065321e-07, "loss": 0.0257, "num_tokens": 183749997.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6908342242240906, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014571072521745922, "rewards/wordcountpos_reward/raw_geo/std": 0.19855242300784873, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 1052.9375, "completions/mean_terminated_length": 1052.9375, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.8387677535507101, "frac_reward_zero_std": 0.0, "grad_norm": 3.1633718032572817, "kl": 0.013916015625, "learning_rate": 1.69578360399336e-07, "loss": -0.0354, "num_tokens": 183793780.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9785726070404053, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0938883682081482, "rewards/wordcountpos_reward/raw_geo/std": 0.17406941445201102, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382576, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1310.125, "completions/mean_terminated_length": 1162.4444580078125, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.8389677935587118, "frac_reward_zero_std": 0.0, "grad_norm": 2.802309411850493, "kl": 0.0149383544921875, "learning_rate": 1.6941060356333781e-07, "loss": 0.0031, "num_tokens": 183846998.0, "reward": -1.862645149230957e-08, "reward_std": 1.0128107070922852, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03027788090338776, "rewards/wordcountpos_reward/raw_geo/std": 0.12230315720937145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1281.75, "completions/mean_terminated_length": 1281.75, "completions/min_length": 1117.0, "completions/min_terminated_length": 1117.0, "epoch": 0.8391678335667133, "frac_reward_zero_std": 0.0, "grad_norm": 2.9539514103966247, "kl": 0.0152587890625, "learning_rate": 1.692430323044574e-07, "loss": 0.022, "num_tokens": 183888250.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9207077622413635, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.031766574801991146, "rewards/wordcountpos_reward/raw_geo/std": 0.09396176724619607, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1370.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1149.625, "completions/mean_terminated_length": 1149.625, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.839367873574715, "frac_reward_zero_std": 0.0, "grad_norm": 3.596171328972804, "kl": 0.02032470703125, "learning_rate": 1.690756467044034e-07, "loss": 0.0439, "num_tokens": 183932940.0, "reward": 0.0, "reward_std": 0.8140091896057129, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.28674431887428165, "rewards/wordcountpos_reward/raw_geo/std": 0.2550875573674385, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 1061.0, "completions/mean_terminated_length": 1061.0, "completions/min_length": 818.0, "completions/min_terminated_length": 818.0, "epoch": 0.8395679135827165, "frac_reward_zero_std": 0.0, "grad_norm": 3.2463721625598057, "kl": 0.01361083984375, "learning_rate": 1.6890844684479386e-07, "loss": 0.0196, "num_tokens": 183964196.0, "reward": -3.725290298461914e-08, "reward_std": 0.9887361526489258, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.055228775229892876, "rewards/wordcountpos_reward/raw_geo/std": 0.07464017306181162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 1005.25, "completions/mean_terminated_length": 1005.25, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.8397679535907181, "frac_reward_zero_std": 0.0, "grad_norm": 3.7323045500262055, "kl": 0.0196533203125, "learning_rate": 1.687414328071562e-07, "loss": -0.0005, "num_tokens": 183999656.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9975693821907043, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.23055948616270083, "rewards/wordcountpos_reward/raw_geo/std": 0.23858558336328053, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1163.4375, "completions/mean_terminated_length": 1163.4375, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.8399679935987198, "frac_reward_zero_std": 0.0, "grad_norm": 2.6377315335757126, "kl": 0.0143890380859375, "learning_rate": 1.685746046729271e-07, "loss": -0.038, "num_tokens": 184035687.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8997478485107422, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08955687269708615, "rewards/wordcountpos_reward/raw_geo/std": 0.1647875552330036, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1377.0, "completions/mean_length": 1114.6875, "completions/mean_terminated_length": 1089.0, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.8401680336067213, "frac_reward_zero_std": 0.0, "grad_norm": 2.877437028691097, "kl": 0.0146026611328125, "learning_rate": 1.6840796252345273e-07, "loss": 0.0358, "num_tokens": 184082282.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0240176916122437, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08057870621974045, "rewards/wordcountpos_reward/raw_geo/std": 0.10209458030121253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1219.1875, "completions/mean_terminated_length": 1154.3846435546875, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.840368073614723, "frac_reward_zero_std": 0.0, "grad_norm": 2.940353064350226, "kl": 0.015777587890625, "learning_rate": 1.6824150643998866e-07, "loss": -0.0847, "num_tokens": 184135053.0, "reward": 0.0, "reward_std": 1.0008465051651, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.28644556792680176, "rewards/wordcountpos_reward/raw_geo/std": 0.16166994410359004, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1235.75, "completions/mean_terminated_length": 1198.0, "completions/min_length": 939.0, "completions/min_terminated_length": 939.0, "epoch": 0.8405681136227245, "frac_reward_zero_std": 0.0, "grad_norm": 2.6202734441308415, "kl": 0.016326904296875, "learning_rate": 1.6807523650369964e-07, "loss": -0.0333, "num_tokens": 184171385.0, "reward": 0.0, "reward_std": 0.613052487373352, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010331911979394079, "rewards/wordcountpos_reward/raw_geo/std": 0.06426193714548341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1272.25, "completions/mean_terminated_length": 1239.71435546875, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.8407681536307261, "frac_reward_zero_std": 0.0, "grad_norm": 3.189823513098897, "kl": 0.0154571533203125, "learning_rate": 1.6790915279565955e-07, "loss": -0.0092, "num_tokens": 184222461.0, "reward": 0.0, "reward_std": 0.378025084733963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13106513585152696, "rewards/wordcountpos_reward/raw_geo/std": 0.21636622877668943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1033.9375, "completions/mean_terminated_length": 1033.9375, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.8409681936387278, "frac_reward_zero_std": 0.0, "grad_norm": 2.532740380629755, "kl": 0.006282806396484375, "learning_rate": 1.6774325539685153e-07, "loss": -0.0049, "num_tokens": 184265388.0, "reward": -7.450580596923828e-09, "reward_std": 1.044877052307129, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.21073724471104419, "rewards/wordcountpos_reward/raw_geo/std": 0.1958907824583539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1223.25, "completions/mean_terminated_length": 1204.800048828125, "completions/min_length": 954.0, "completions/min_terminated_length": 954.0, "epoch": 0.8411682336467293, "frac_reward_zero_std": 0.0, "grad_norm": 3.4853422978803485, "kl": 0.0164642333984375, "learning_rate": 1.6757754438816797e-07, "loss": 0.0267, "num_tokens": 184310328.0, "reward": 0.0, "reward_std": 0.8809579610824585, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11727343211512725, "rewards/wordcountpos_reward/raw_geo/std": 0.17227616704153395, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1056.625, "completions/mean_terminated_length": 1056.625, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.841368273654731, "frac_reward_zero_std": 0.0, "grad_norm": 2.8027102825295005, "kl": 0.0156707763671875, "learning_rate": 1.6741201985041044e-07, "loss": -0.0064, "num_tokens": 184348306.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6755726933479309, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07001904554247086, "rewards/wordcountpos_reward/raw_geo/std": 0.10005711480014473, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1265.4375, "completions/mean_terminated_length": 1231.9285888671875, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.8415683136627325, "frac_reward_zero_std": 0.0, "grad_norm": 2.806294179902459, "kl": 0.01361083984375, "learning_rate": 1.672466818642891e-07, "loss": 0.0221, "num_tokens": 184393937.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9329041838645935, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.046482647770613936, "rewards/wordcountpos_reward/raw_geo/std": 0.07889659674487146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 1046.8125, "completions/mean_terminated_length": 1046.8125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.8417683536707341, "frac_reward_zero_std": 0.0, "grad_norm": 2.9907084740281693, "kl": 0.016357421875, "learning_rate": 1.6708153051042396e-07, "loss": -0.0083, "num_tokens": 184438094.0, "reward": 0.0, "reward_std": 0.9689133167266846, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11715412547823066, "rewards/wordcountpos_reward/raw_geo/std": 0.13367177334839467, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1232.8125, "completions/mean_terminated_length": 1194.6429443359375, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.8419683936787358, "frac_reward_zero_std": 0.0, "grad_norm": 2.7435264091648865, "kl": 0.019439697265625, "learning_rate": 1.669165658693432e-07, "loss": -0.014, "num_tokens": 184476659.0, "reward": 0.0, "reward_std": 0.9619782567024231, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.047400809804167957, "rewards/wordcountpos_reward/raw_geo/std": 0.07452611326310586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1194.875, "completions/mean_terminated_length": 1194.875, "completions/min_length": 932.0, "completions/min_terminated_length": 932.0, "epoch": 0.8421684336867373, "frac_reward_zero_std": 0.0, "grad_norm": 3.206735201677164, "kl": 0.0172119140625, "learning_rate": 1.6675178802148458e-07, "loss": -0.0034, "num_tokens": 184513793.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9338348507881165, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2669559482649909, "rewards/wordcountpos_reward/raw_geo/std": 0.3285173949516111, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07084150279686703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1107.0625, "completions/mean_terminated_length": 1107.0625, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.842368473694739, "frac_reward_zero_std": 0.0, "grad_norm": 3.483964252893668, "kl": 0.019561767578125, "learning_rate": 1.6658719704719422e-07, "loss": -0.0071, "num_tokens": 184558842.0, "reward": 0.0, "reward_std": 1.0527150630950928, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03922980066603428, "rewards/wordcountpos_reward/raw_geo/std": 0.05851609129844432, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1102186379345533, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1423.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1146.0, "completions/mean_terminated_length": 1146.0, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.8425685137027406, "frac_reward_zero_std": 0.0, "grad_norm": 2.6630398933551764, "kl": 0.0174407958984375, "learning_rate": 1.6642279302672786e-07, "loss": -0.0314, "num_tokens": 184597090.0, "reward": 0.0, "reward_std": 0.7795137166976929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1048210648792832, "rewards/wordcountpos_reward/raw_geo/std": 0.08809429810361469, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 995.6875, "completions/mean_terminated_length": 995.6875, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.8427685537107421, "frac_reward_zero_std": 0.0, "grad_norm": 3.565665827764114, "kl": 0.0168914794921875, "learning_rate": 1.6625857604024924e-07, "loss": 0.0333, "num_tokens": 184638157.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0388805866241455, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06134512654289152, "rewards/wordcountpos_reward/raw_geo/std": 0.057731292842149465, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1295.0, "completions/mean_terminated_length": 1281.3333740234375, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.8429685937187438, "frac_reward_zero_std": 0.0, "grad_norm": 2.2298953061834146, "kl": 0.014739990234375, "learning_rate": 1.660945461678315e-07, "loss": -0.0469, "num_tokens": 184691517.0, "reward": 5.960464477539063e-08, "reward_std": 0.658831000328064, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11324050640336993, "rewards/wordcountpos_reward/raw_geo/std": 0.055353663644602424, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1310.5625, "completions/mean_terminated_length": 1283.5, "completions/min_length": 1090.0, "completions/min_terminated_length": 1090.0, "epoch": 0.8431686337267453, "frac_reward_zero_std": 0.0, "grad_norm": 2.8944614735126497, "kl": 0.015899658203125, "learning_rate": 1.659307034894563e-07, "loss": 0.0022, "num_tokens": 184734374.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0123498439788818, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10645645392754402, "rewards/wordcountpos_reward/raw_geo/std": 0.08356177517735196, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1264.5625, "completions/mean_terminated_length": 1157.5455322265625, "completions/min_length": 965.0, "completions/min_terminated_length": 965.0, "epoch": 0.843368673734747, "frac_reward_zero_std": 0.0, "grad_norm": 2.8858189476905065, "kl": 0.0140533447265625, "learning_rate": 1.6576704808501426e-07, "loss": 0.0131, "num_tokens": 184784759.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7853432893753052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08711031898155477, "rewards/wordcountpos_reward/raw_geo/std": 0.23849619570719685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1123.5, "completions/mean_terminated_length": 1123.5, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.8435687137427486, "frac_reward_zero_std": 0.0, "grad_norm": 2.9900039201454476, "kl": 0.0166168212890625, "learning_rate": 1.656035800343042e-07, "loss": -0.0096, "num_tokens": 184821839.0, "reward": 3.725290298461914e-09, "reward_std": 1.0591914653778076, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.023139936886901833, "rewards/wordcountpos_reward/raw_geo/std": 0.12884070020865665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 978.75, "completions/mean_terminated_length": 978.75, "completions/min_length": 681.0, "completions/min_terminated_length": 681.0, "epoch": 0.8437687537507501, "frac_reward_zero_std": 0.0, "grad_norm": 3.8736875276511284, "kl": 0.01934814453125, "learning_rate": 1.6544029941703403e-07, "loss": -0.0598, "num_tokens": 184849979.0, "reward": 0.0, "reward_std": 0.7135446071624756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.043986951518686666, "rewards/wordcountpos_reward/raw_geo/std": 0.056547951724611065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717427, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1156.0, "completions/max_terminated_length": 1156.0, "completions/mean_length": 952.375, "completions/mean_terminated_length": 952.375, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.8439687937587518, "frac_reward_zero_std": 0.0, "grad_norm": 4.081674282903824, "kl": 0.021881103515625, "learning_rate": 1.652772063128201e-07, "loss": -0.0185, "num_tokens": 184890529.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9565083384513855, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.5062828503448303, "rewards/wordcountpos_reward/raw_geo/std": 0.41104102071029863, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1468.9375, "completions/mean_terminated_length": 1417.166748046875, "completions/min_length": 1342.0, "completions/min_terminated_length": 1342.0, "epoch": 0.8441688337667533, "frac_reward_zero_std": 0.0, "grad_norm": 2.7909981421678043, "kl": 0.01617431640625, "learning_rate": 1.6511430080118754e-07, "loss": 0.0014, "num_tokens": 184947512.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7584692239761353, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09794054788635326, "rewards/wordcountpos_reward/raw_geo/std": 0.3276004829502516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1257.375, "completions/mean_terminated_length": 1111.800048828125, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.844368873774755, "frac_reward_zero_std": 0.0, "grad_norm": 2.9589987718988384, "kl": 0.018951416015625, "learning_rate": 1.6495158296156946e-07, "loss": -0.0078, "num_tokens": 184991566.0, "reward": -1.4901161193847656e-08, "reward_std": 0.907779335975647, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.051469383847208144, "rewards/wordcountpos_reward/raw_geo/std": 0.2385497481381964, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1132.375, "completions/mean_terminated_length": 1107.86669921875, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8445689137827566, "frac_reward_zero_std": 0.0, "grad_norm": 3.247076790331538, "kl": 0.01971435546875, "learning_rate": 1.6478905287330803e-07, "loss": -0.0099, "num_tokens": 185026156.0, "reward": 0.0, "reward_std": 0.5914257168769836, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006377694668615544, "rewards/wordcountpos_reward/raw_geo/std": 0.05843934693856194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402215, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1048.8125, "completions/mean_terminated_length": 1048.8125, "completions/min_length": 834.0, "completions/min_terminated_length": 834.0, "epoch": 0.8447689537907581, "frac_reward_zero_std": 0.0, "grad_norm": 3.66081407413695, "kl": 0.018707275390625, "learning_rate": 1.6462671061565358e-07, "loss": 0.0536, "num_tokens": 185070097.0, "reward": 1.862645149230957e-08, "reward_std": 1.040417194366455, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07243112429020321, "rewards/wordcountpos_reward/raw_geo/std": 0.1355905938673456, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1214.125, "completions/mean_terminated_length": 1195.0667724609375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.8449689937987598, "frac_reward_zero_std": 0.0, "grad_norm": 3.1951870100073636, "kl": 0.01861572265625, "learning_rate": 1.6446455626776513e-07, "loss": -0.0395, "num_tokens": 185120939.0, "reward": 0.0, "reward_std": 0.8109018206596375, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03429090232185858, "rewards/wordcountpos_reward/raw_geo/std": 0.15173259922789503, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1383.125, "completions/mean_terminated_length": 1344.166748046875, "completions/min_length": 1068.0, "completions/min_terminated_length": 1068.0, "epoch": 0.8451690338067613, "frac_reward_zero_std": 0.0, "grad_norm": 2.9058610794820336, "kl": 0.0169219970703125, "learning_rate": 1.6430258990870958e-07, "loss": 0.0095, "num_tokens": 185169797.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9499474763870239, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10924342787036953, "rewards/wordcountpos_reward/raw_geo/std": 0.054949784632567514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 1122.25, "completions/mean_terminated_length": 1097.0667724609375, "completions/min_length": 610.0, "completions/min_terminated_length": 610.0, "epoch": 0.845369073814763, "frac_reward_zero_std": 0.0, "grad_norm": 3.3893319838023035, "kl": 0.01922607421875, "learning_rate": 1.6414081161746263e-07, "loss": 0.0377, "num_tokens": 185208121.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0236995220184326, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.038972402486544704, "rewards/wordcountpos_reward/raw_geo/std": 0.11698261109027211, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1034.9375, "completions/mean_terminated_length": 1034.9375, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.8455691138227646, "frac_reward_zero_std": 0.0, "grad_norm": 3.618674712408878, "kl": 0.0162200927734375, "learning_rate": 1.6397922147290805e-07, "loss": 0.0336, "num_tokens": 185250032.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9203010201454163, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023404517627023193, "rewards/wordcountpos_reward/raw_geo/std": 0.13329214906628253, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1039.4375, "completions/mean_terminated_length": 1039.4375, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.8457691538307661, "frac_reward_zero_std": 0.0, "grad_norm": 3.3202372642726172, "kl": 0.01219940185546875, "learning_rate": 1.638178195538381e-07, "loss": -0.0234, "num_tokens": 185295959.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4270082712173462, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15038438187747186, "rewards/wordcountpos_reward/raw_geo/std": 0.21102450148432791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.11417984514369003, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1108.1875, "completions/mean_terminated_length": 1082.0667724609375, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.8459691938387678, "frac_reward_zero_std": 0.0, "grad_norm": 2.8306647061266657, "kl": 0.016998291015625, "learning_rate": 1.6365660593895272e-07, "loss": 0.0251, "num_tokens": 185330626.0, "reward": 0.0, "reward_std": 0.7596127986907959, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12818389493668295, "rewards/wordcountpos_reward/raw_geo/std": 0.08020489378766785, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1284.0, "completions/max_terminated_length": 1284.0, "completions/mean_length": 1174.5, "completions/mean_terminated_length": 1174.5, "completions/min_length": 1048.0, "completions/min_terminated_length": 1048.0, "epoch": 0.8461692338467693, "frac_reward_zero_std": 0.0, "grad_norm": 2.4624705266997453, "kl": 0.011871337890625, "learning_rate": 1.634955807068609e-07, "loss": -0.0185, "num_tokens": 185374994.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8630352020263672, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057336679958770884, "rewards/wordcountpos_reward/raw_geo/std": 0.07403286686088428, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066473, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1023.1875, "completions/mean_terminated_length": 1023.1875, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.846369273854771, "frac_reward_zero_std": 0.0, "grad_norm": 3.5710142062157284, "kl": 0.0169830322265625, "learning_rate": 1.6333474393607882e-07, "loss": -0.0138, "num_tokens": 185426581.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7732509970664978, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06279585103055803, "rewards/wordcountpos_reward/raw_geo/std": 0.050573311623410076, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1204.5, "completions/mean_terminated_length": 1070.181884765625, "completions/min_length": 850.0, "completions/min_terminated_length": 850.0, "epoch": 0.8465693138627726, "frac_reward_zero_std": 0.0, "grad_norm": 7.639687181397856, "kl": 0.0825958251953125, "learning_rate": 1.6317409570503142e-07, "loss": -0.0211, "num_tokens": 185472413.0, "reward": 0.0, "reward_std": 1.0182019472122192, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04178581074926172, "rewards/wordcountpos_reward/raw_geo/std": 0.07938473523409269, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1202.8125, "completions/mean_terminated_length": 1202.8125, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.8467693538707741, "frac_reward_zero_std": 0.0, "grad_norm": 3.0901346571515287, "kl": 0.016876220703125, "learning_rate": 1.6301363609205158e-07, "loss": 0.0065, "num_tokens": 185516018.0, "reward": 0.0, "reward_std": 0.8045932054519653, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09136107144705428, "rewards/wordcountpos_reward/raw_geo/std": 0.20917851845910965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1097.625, "completions/mean_terminated_length": 1097.625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.8469693938787758, "frac_reward_zero_std": 0.0, "grad_norm": 3.1426257238237576, "kl": 0.0194549560546875, "learning_rate": 1.6285336517538006e-07, "loss": -0.0129, "num_tokens": 185562244.0, "reward": 0.0, "reward_std": 0.9333032369613647, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09104466256855293, "rewards/wordcountpos_reward/raw_geo/std": 0.08615881977030623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1051.75, "completions/mean_terminated_length": 1021.86669921875, "completions/min_length": 617.0, "completions/min_terminated_length": 617.0, "epoch": 0.8471694338867773, "frac_reward_zero_std": 0.0, "grad_norm": 3.0908070179601457, "kl": 0.02001953125, "learning_rate": 1.6269328303316566e-07, "loss": 0.0088, "num_tokens": 185601704.0, "reward": 0.0, "reward_std": 0.9007909297943115, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09817201160394437, "rewards/wordcountpos_reward/raw_geo/std": 0.12855433645231085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.15563490039905004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1254.25, "completions/mean_terminated_length": 1254.25, "completions/min_length": 1080.0, "completions/min_terminated_length": 1080.0, "epoch": 0.8473694738947789, "frac_reward_zero_std": 0.0, "grad_norm": 2.311009491737528, "kl": 0.0089874267578125, "learning_rate": 1.625333897434651e-07, "loss": 0.0008, "num_tokens": 185649788.0, "reward": 0.0, "reward_std": 0.9591407775878906, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2377075813166084, "rewards/wordcountpos_reward/raw_geo/std": 0.16361931906714533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1233.75, "completions/mean_terminated_length": 1145.0, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.8475695139027806, "frac_reward_zero_std": 0.0, "grad_norm": 3.219533821381868, "kl": 0.019744873046875, "learning_rate": 1.6237368538424315e-07, "loss": -0.0556, "num_tokens": 185697168.0, "reward": 0.0, "reward_std": 0.26475316286087036, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0019723641383299125, "rewards/wordcountpos_reward/raw_geo/std": 0.2990696692189127, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1241.75, "completions/mean_terminated_length": 1241.75, "completions/min_length": 1078.0, "completions/min_terminated_length": 1078.0, "epoch": 0.8477695539107821, "frac_reward_zero_std": 0.0, "grad_norm": 3.557813333005682, "kl": 0.0196533203125, "learning_rate": 1.6221417003337255e-07, "loss": 0.0084, "num_tokens": 185742868.0, "reward": 0.0, "reward_std": 0.6245682239532471, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03718983996362932, "rewards/wordcountpos_reward/raw_geo/std": 0.19340440126744937, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1237.8125, "completions/mean_terminated_length": 1200.357177734375, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.8479695939187838, "frac_reward_zero_std": 0.0, "grad_norm": 2.983214006708892, "kl": 0.0211181640625, "learning_rate": 1.6205484376863337e-07, "loss": 0.0206, "num_tokens": 185798289.0, "reward": 0.0, "reward_std": 0.8106904029846191, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06473697944433005, "rewards/wordcountpos_reward/raw_geo/std": 0.10330988394891201, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.19659132266747725, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1171.4375, "completions/mean_terminated_length": 1124.5, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.8481696339267853, "frac_reward_zero_std": 0.0, "grad_norm": 3.636698130563758, "kl": 0.020751953125, "learning_rate": 1.61895706667714e-07, "loss": -0.0209, "num_tokens": 185844592.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8303330540657043, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004103220572583926, "rewards/wordcountpos_reward/raw_geo/std": 0.2978434716442249, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1310.3125, "completions/mean_terminated_length": 1224.0909423828125, "completions/min_length": 931.0, "completions/min_terminated_length": 931.0, "epoch": 0.8483696739347869, "frac_reward_zero_std": 0.0, "grad_norm": 2.852923865252584, "kl": 0.0150909423828125, "learning_rate": 1.6173675880821045e-07, "loss": -0.0241, "num_tokens": 185892221.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0261778831481934, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.6095104705440308, "rewards/wordcountpos_reward/raw_geo/std": 0.3688281595922464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1264911064067352, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1048.5, "completions/mean_terminated_length": 1048.5, "completions/min_length": 856.0, "completions/min_terminated_length": 856.0, "epoch": 0.8485697139427886, "frac_reward_zero_std": 0.0, "grad_norm": 3.561798895348104, "kl": 0.0205078125, "learning_rate": 1.6157800026762646e-07, "loss": -0.0013, "num_tokens": 185935701.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5566320419311523, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06574714061979096, "rewards/wordcountpos_reward/raw_geo/std": 0.07906336582253505, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852977, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 892.25, "completions/mean_terminated_length": 892.25, "completions/min_length": 625.0, "completions/min_terminated_length": 625.0, "epoch": 0.8487697539507901, "frac_reward_zero_std": 0.0, "grad_norm": 3.8047416791670523, "kl": 0.0181121826171875, "learning_rate": 1.614194311233732e-07, "loss": -0.0155, "num_tokens": 185973825.0, "reward": 0.0, "reward_std": 0.6174795031547546, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0921139571468311, "rewards/wordcountpos_reward/raw_geo/std": 0.1961595479233047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1228.375, "completions/mean_terminated_length": 1210.2667236328125, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.8489697939587918, "frac_reward_zero_std": 0.0, "grad_norm": 2.717487092890986, "kl": 0.013458251953125, "learning_rate": 1.612610514527698e-07, "loss": 0.0341, "num_tokens": 186027383.0, "reward": 0.0, "reward_std": 0.9279111623764038, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.039287689906791656, "rewards/wordcountpos_reward/raw_geo/std": 0.1455105956839294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1247.3125, "completions/mean_terminated_length": 1211.21435546875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.8491698339667934, "frac_reward_zero_std": 0.0, "grad_norm": 2.560462325913471, "kl": 0.0128173828125, "learning_rate": 1.611028613330431e-07, "loss": 0.0307, "num_tokens": 186081444.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4450644254684448, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.023472088204412874, "rewards/wordcountpos_reward/raw_geo/std": 0.08722224724435777, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316066, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 1003.25, "completions/mean_terminated_length": 970.1333618164062, "completions/min_length": 645.0, "completions/min_terminated_length": 645.0, "epoch": 0.8493698739747949, "frac_reward_zero_std": 0.0, "grad_norm": 3.5713539820489704, "kl": 0.0163421630859375, "learning_rate": 1.6094486084132705e-07, "loss": -0.054, "num_tokens": 186110528.0, "reward": 0.0, "reward_std": 1.0426735877990723, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0768982422111562, "rewards/wordcountpos_reward/raw_geo/std": 0.1673338942169055, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333333, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1096.3125, "completions/mean_terminated_length": 1096.3125, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.8495699139827966, "frac_reward_zero_std": 0.0, "grad_norm": 2.021881995418864, "kl": 0.0081634521484375, "learning_rate": 1.607870500546635e-07, "loss": 0.0016, "num_tokens": 186156389.0, "reward": 0.0, "reward_std": 0.8262585997581482, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.007073268512913696, "rewards/wordcountpos_reward/raw_geo/std": 0.11577557173122423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 1012.0, "completions/mean_terminated_length": 1012.0, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.8497699539907981, "frac_reward_zero_std": 0.0, "grad_norm": 2.7702457447710227, "kl": 0.012176513671875, "learning_rate": 1.6062942905000167e-07, "loss": 0.0114, "num_tokens": 186197773.0, "reward": 0.0, "reward_std": 0.4717167615890503, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09769481258496451, "rewards/wordcountpos_reward/raw_geo/std": 0.1272162139753259, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1341.625, "completions/mean_terminated_length": 1305.0770263671875, "completions/min_length": 1132.0, "completions/min_terminated_length": 1132.0, "epoch": 0.8499699939987998, "frac_reward_zero_std": 0.0, "grad_norm": 3.088877692231369, "kl": 0.0160369873046875, "learning_rate": 1.6047199790419854e-07, "loss": 0.0123, "num_tokens": 186253503.0, "reward": -7.450580596923828e-09, "reward_std": 1.0363705158233643, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1173015688386994, "rewards/wordcountpos_reward/raw_geo/std": 0.19477610949919955, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1247.0, "completions/max_terminated_length": 1247.0, "completions/mean_length": 884.4375, "completions/mean_terminated_length": 884.4375, "completions/min_length": 497.0, "completions/min_terminated_length": 497.0, "epoch": 0.8501700340068014, "frac_reward_zero_std": 0.0, "grad_norm": 4.0669441148003695, "kl": 0.016357421875, "learning_rate": 1.603147566940179e-07, "loss": 0.0443, "num_tokens": 186285270.0, "reward": 0.0, "reward_std": 0.8601611256599426, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19270641818277848, "rewards/wordcountpos_reward/raw_geo/std": 0.20475173092562418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07588978362901862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1387.4375, "completions/mean_terminated_length": 1299.888916015625, "completions/min_length": 1086.0, "completions/min_terminated_length": 1086.0, "epoch": 0.8503700740148029, "frac_reward_zero_std": 0.0, "grad_norm": 2.226382751547088, "kl": 0.012359619140625, "learning_rate": 1.601577054961314e-07, "loss": -0.008, "num_tokens": 186334213.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0563294887542725, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02887788334158703, "rewards/wordcountpos_reward/raw_geo/std": 0.06498778030914137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.0957427107756338, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1406.0, "completions/mean_length": 1238.1875, "completions/mean_terminated_length": 1177.769287109375, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.8505701140228046, "frac_reward_zero_std": 0.0, "grad_norm": 2.697188908020762, "kl": 0.017486572265625, "learning_rate": 1.6000084438711793e-07, "loss": -0.0005, "num_tokens": 186382336.0, "reward": 3.725290298461914e-09, "reward_std": 0.9985430836677551, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.056993701594707784, "rewards/wordcountpos_reward/raw_geo/std": 0.09372194820873606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0687184270936277, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1203.0, "completions/mean_terminated_length": 1203.0, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.8507701540308061, "frac_reward_zero_std": 0.0, "grad_norm": 3.0115147100252773, "kl": 0.0194854736328125, "learning_rate": 1.5984417344346379e-07, "loss": -0.0619, "num_tokens": 186425096.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9739514589309692, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03907749838259163, "rewards/wordcountpos_reward/raw_geo/std": 0.1475170772891042, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1302.375, "completions/mean_terminated_length": 1256.769287109375, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.8509701940388078, "frac_reward_zero_std": 0.0, "grad_norm": 2.6305209478374447, "kl": 0.0157928466796875, "learning_rate": 1.5968769274156217e-07, "loss": 0.0083, "num_tokens": 186468390.0, "reward": 0.0, "reward_std": 0.6631268262863159, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17450588961015223, "rewards/wordcountpos_reward/raw_geo/std": 0.23205631261301526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1315.75, "completions/mean_terminated_length": 1303.4666748046875, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.8511702340468094, "frac_reward_zero_std": 0.0, "grad_norm": 2.904932339425578, "kl": 0.017547607421875, "learning_rate": 1.5953140235771411e-07, "loss": 0.0278, "num_tokens": 186515450.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8783310651779175, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04796440369889608, "rewards/wordcountpos_reward/raw_geo/std": 0.14319253924420325, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1128.25, "completions/mean_terminated_length": 1103.4666748046875, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.8513702740548109, "frac_reward_zero_std": 0.0, "grad_norm": 3.4733951619444774, "kl": 0.016876220703125, "learning_rate": 1.5937530236812724e-07, "loss": 0.0092, "num_tokens": 186557742.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9855725765228271, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.032420723051446, "rewards/wordcountpos_reward/raw_geo/std": 0.10137659871710243, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1252.3125, "completions/mean_terminated_length": 1235.800048828125, "completions/min_length": 1044.0, "completions/min_terminated_length": 1044.0, "epoch": 0.8515703140628126, "frac_reward_zero_std": 0.0, "grad_norm": 2.7422115249420327, "kl": 0.01181793212890625, "learning_rate": 1.592193928489168e-07, "loss": -0.04, "num_tokens": 186605587.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6056020855903625, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03617867788127297, "rewards/wordcountpos_reward/raw_geo/std": 0.07671836005783819, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1256.1875, "completions/mean_terminated_length": 1109.9000244140625, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.8517703540708141, "frac_reward_zero_std": 0.0, "grad_norm": 3.046882106349635, "kl": 0.019287109375, "learning_rate": 1.5906367387610476e-07, "loss": 0.0057, "num_tokens": 186654854.0, "reward": -2.9802322387695312e-08, "reward_std": 0.952633261680603, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041031860199658834, "rewards/wordcountpos_reward/raw_geo/std": 0.052635361347324376, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1443.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1201.3125, "completions/mean_terminated_length": 1201.3125, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.8519703940788158, "frac_reward_zero_std": 0.0, "grad_norm": 3.118493683450591, "kl": 0.0141448974609375, "learning_rate": 1.5890814552562074e-07, "loss": -0.0339, "num_tokens": 186704187.0, "reward": 2.9802322387695312e-08, "reward_std": 1.057565689086914, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.015188166950536802, "rewards/wordcountpos_reward/raw_geo/std": 0.15415917684262875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1218.8125, "completions/mean_terminated_length": 1200.0667724609375, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.8521704340868174, "frac_reward_zero_std": 0.0, "grad_norm": 3.3082579157562555, "kl": 0.0223388671875, "learning_rate": 1.587528078733007e-07, "loss": -0.0042, "num_tokens": 186746336.0, "reward": -5.960464477539063e-08, "reward_std": 0.781602144241333, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11362564397563234, "rewards/wordcountpos_reward/raw_geo/std": 0.23290513177382524, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1443.0, "completions/mean_length": 1155.125, "completions/mean_terminated_length": 1132.1334228515625, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.8523704740948189, "frac_reward_zero_std": 0.0, "grad_norm": 3.1553200633538694, "kl": 0.019317626953125, "learning_rate": 1.5859766099488823e-07, "loss": -0.0206, "num_tokens": 186779322.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9145583510398865, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07361645305799007, "rewards/wordcountpos_reward/raw_geo/std": 0.061091925845603995, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1323.8125, "completions/mean_terminated_length": 1283.1539306640625, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.8525705141028206, "frac_reward_zero_std": 0.0, "grad_norm": 2.985472056653899, "kl": 0.0183868408203125, "learning_rate": 1.5844270496603358e-07, "loss": 0.0148, "num_tokens": 186822791.0, "reward": 0.0, "reward_std": 0.7241206169128418, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12582797232787257, "rewards/wordcountpos_reward/raw_geo/std": 0.1653241331855506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1051.8125, "completions/mean_terminated_length": 1051.8125, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.8527705541108221, "frac_reward_zero_std": 0.0, "grad_norm": 3.6514461879068483, "kl": 0.02313232421875, "learning_rate": 1.5828793986229406e-07, "loss": -0.0043, "num_tokens": 186871892.0, "reward": 0.0, "reward_std": 0.42495203018188477, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10109573215979552, "rewards/wordcountpos_reward/raw_geo/std": 0.19209501240252858, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1134.8125, "completions/mean_terminated_length": 1134.8125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.8529705941188238, "frac_reward_zero_std": 0.0, "grad_norm": 3.4244022471910123, "kl": 0.017791748046875, "learning_rate": 1.5813336575913372e-07, "loss": 0.0188, "num_tokens": 186912513.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7991480827331543, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00023486889688514433, "rewards/wordcountpos_reward/raw_geo/std": 0.05988669294492063, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238703, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1090.5625, "completions/mean_terminated_length": 1063.2667236328125, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.8531706341268254, "frac_reward_zero_std": 0.0, "grad_norm": 2.3643624924947884, "kl": 0.009273529052734375, "learning_rate": 1.5797898273192366e-07, "loss": -0.0115, "num_tokens": 186946762.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6663957238197327, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09120612627746677, "rewards/wordcountpos_reward/raw_geo/std": 0.051074235787489326, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1285.375, "completions/mean_terminated_length": 1285.375, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.8533706741348269, "frac_reward_zero_std": 0.0, "grad_norm": 1.9544499547736618, "kl": 0.009777069091796875, "learning_rate": 1.5782479085594175e-07, "loss": -0.01, "num_tokens": 186993696.0, "reward": 0.0, "reward_std": 0.7839667201042175, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11135605058458255, "rewards/wordcountpos_reward/raw_geo/std": 0.11766450226605914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792296, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1363.4375, "completions/mean_terminated_length": 1343.9285888671875, "completions/min_length": 1212.0, "completions/min_terminated_length": 1212.0, "epoch": 0.8535707141428286, "frac_reward_zero_std": 0.0, "grad_norm": 2.85998436199332, "kl": 0.0144195556640625, "learning_rate": 1.5767079020637274e-07, "loss": 0.02, "num_tokens": 187049319.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9168845415115356, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10433793431664905, "rewards/wordcountpos_reward/raw_geo/std": 0.17900168364046298, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1231.0, "completions/mean_terminated_length": 1192.571533203125, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.8537707541508301, "frac_reward_zero_std": 0.0, "grad_norm": 3.4751882929589835, "kl": 0.02008056640625, "learning_rate": 1.5751698085830788e-07, "loss": 0.0047, "num_tokens": 187090351.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9942353963851929, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08379981637475323, "rewards/wordcountpos_reward/raw_geo/std": 0.07919323557715947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1068.75, "completions/mean_terminated_length": 1068.75, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.8539707941588318, "frac_reward_zero_std": 0.0, "grad_norm": 2.860662612550725, "kl": 0.0108489990234375, "learning_rate": 1.573633628867454e-07, "loss": -0.0216, "num_tokens": 187127955.0, "reward": -3.725290298461914e-09, "reward_std": 1.060046672821045, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.022901460265698264, "rewards/wordcountpos_reward/raw_geo/std": 0.07502629773812658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1227.0625, "completions/mean_terminated_length": 1136.0833740234375, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.8541708341668334, "frac_reward_zero_std": 0.0, "grad_norm": 2.7459277075146846, "kl": 0.0144500732421875, "learning_rate": 1.5720993636659006e-07, "loss": -0.1056, "num_tokens": 187182404.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0389059782028198, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.041829035790030125, "rewards/wordcountpos_reward/raw_geo/std": 0.21113980878163446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1314.4375, "completions/mean_terminated_length": 1170.111083984375, "completions/min_length": 966.0, "completions/min_terminated_length": 966.0, "epoch": 0.8543708741748349, "frac_reward_zero_std": 0.0, "grad_norm": 3.2617865895902165, "kl": 0.0208740234375, "learning_rate": 1.5705670137265347e-07, "loss": 0.0343, "num_tokens": 187236179.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0436856746673584, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08823955309985956, "rewards/wordcountpos_reward/raw_geo/std": 0.11743251081323398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1296.875, "completions/mean_terminated_length": 1283.3333740234375, "completions/min_length": 819.0, "completions/min_terminated_length": 819.0, "epoch": 0.8545709141828366, "frac_reward_zero_std": 0.0, "grad_norm": 2.9708656824894124, "kl": 0.02166748046875, "learning_rate": 1.5690365797965354e-07, "loss": -0.0567, "num_tokens": 187284625.0, "reward": 0.0, "reward_std": 0.8005176782608032, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06560126103643957, "rewards/wordcountpos_reward/raw_geo/std": 0.09173976515944222, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1202.0625, "completions/mean_terminated_length": 1102.75, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.8547709541908382, "frac_reward_zero_std": 0.0, "grad_norm": 3.3237856675872632, "kl": 0.018890380859375, "learning_rate": 1.56750806262215e-07, "loss": -0.0026, "num_tokens": 187337514.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9058147668838501, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0684566498532278, "rewards/wordcountpos_reward/raw_geo/std": 0.17561631634590613, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1230.5625, "completions/mean_terminated_length": 1230.5625, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.8549709941988398, "frac_reward_zero_std": 0.0, "grad_norm": 2.4011508082825626, "kl": 0.011474609375, "learning_rate": 1.5659814629486896e-07, "loss": -0.0032, "num_tokens": 187383275.0, "reward": -1.4901161193847656e-08, "reward_std": 0.992708683013916, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02861808646842802, "rewards/wordcountpos_reward/raw_geo/std": 0.10880837115780764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202952, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 952.125, "completions/mean_terminated_length": 952.125, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.8551710342068414, "frac_reward_zero_std": 0.0, "grad_norm": 3.3005863655120407, "kl": 0.0189361572265625, "learning_rate": 1.5644567815205334e-07, "loss": 0.0158, "num_tokens": 187423117.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5549744367599487, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22885876703995883, "rewards/wordcountpos_reward/raw_geo/std": 0.12175726192595074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1187.9375, "completions/mean_terminated_length": 1167.1334228515625, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.8553710742148429, "frac_reward_zero_std": 0.0, "grad_norm": 3.191214335462932, "kl": 0.01654052734375, "learning_rate": 1.5629340190811192e-07, "loss": -0.0037, "num_tokens": 187466172.0, "reward": 0.0, "reward_std": 0.9753879308700562, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04495714534799633, "rewards/wordcountpos_reward/raw_geo/std": 0.04788419697738727, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1190.0, "completions/mean_terminated_length": 1190.0, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.8555711142228446, "frac_reward_zero_std": 0.0, "grad_norm": 3.1040110047931924, "kl": 0.018585205078125, "learning_rate": 1.5614131763729576e-07, "loss": -0.0242, "num_tokens": 187511292.0, "reward": 7.450580596923828e-09, "reward_std": 1.045559287071228, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.019628143234036128, "rewards/wordcountpos_reward/raw_geo/std": 0.1248011790618032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1121.5625, "completions/mean_terminated_length": 1121.5625, "completions/min_length": 857.0, "completions/min_terminated_length": 857.0, "epoch": 0.8557711542308462, "frac_reward_zero_std": 0.0, "grad_norm": 2.9892595448986516, "kl": 0.0160064697265625, "learning_rate": 1.5598942541376145e-07, "loss": 0.0034, "num_tokens": 187555077.0, "reward": 0.0, "reward_std": 0.7768270969390869, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.007348372449377117, "rewards/wordcountpos_reward/raw_geo/std": 0.0601718276703609, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1298.0, "completions/mean_terminated_length": 1298.0, "completions/min_length": 1117.0, "completions/min_terminated_length": 1117.0, "epoch": 0.8559711942388478, "frac_reward_zero_std": 0.0, "grad_norm": 2.9990036388170958, "kl": 0.0165252685546875, "learning_rate": 1.558377253115726e-07, "loss": 0.0099, "num_tokens": 187604821.0, "reward": 0.0, "reward_std": 1.0463333129882812, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1753782981614304, "rewards/wordcountpos_reward/raw_geo/std": 0.22921213398217344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666667, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1261.6875, "completions/mean_terminated_length": 1206.6923828125, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.8561712342468494, "frac_reward_zero_std": 0.0, "grad_norm": 2.9890439696517173, "kl": 0.01641845703125, "learning_rate": 1.5568621740469855e-07, "loss": 0.0077, "num_tokens": 187653328.0, "reward": 0.0, "reward_std": 0.7754864692687988, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1734356811465864, "rewards/wordcountpos_reward/raw_geo/std": 0.07325535827868264, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1153.5, "completions/mean_terminated_length": 1153.5, "completions/min_length": 817.0, "completions/min_terminated_length": 817.0, "epoch": 0.8563712742548509, "frac_reward_zero_std": 0.0, "grad_norm": 3.242733772783805, "kl": 0.01678466796875, "learning_rate": 1.555349017670156e-07, "loss": -0.029, "num_tokens": 187694224.0, "reward": 0.0, "reward_std": 0.7155172228813171, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09492224260484379, "rewards/wordcountpos_reward/raw_geo/std": 0.09834224451592491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1040.9375, "completions/mean_terminated_length": 1040.9375, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.8565713142628526, "frac_reward_zero_std": 0.0, "grad_norm": 3.3592848142331833, "kl": 0.0165557861328125, "learning_rate": 1.5538377847230568e-07, "loss": -0.0561, "num_tokens": 187733511.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9899550676345825, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02555945582976734, "rewards/wordcountpos_reward/raw_geo/std": 0.0663624708855156, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1165.625, "completions/mean_terminated_length": 1143.3333740234375, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.8567713542708542, "frac_reward_zero_std": 0.0, "grad_norm": 2.917731105579506, "kl": 0.0173492431640625, "learning_rate": 1.552328475942573e-07, "loss": 0.0287, "num_tokens": 187781641.0, "reward": 0.0, "reward_std": 0.9111736416816711, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02123112289181416, "rewards/wordcountpos_reward/raw_geo/std": 0.05450617625777462, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1290.5, "completions/mean_terminated_length": 1276.533447265625, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.8569713942788558, "frac_reward_zero_std": 0.0, "grad_norm": 2.4232606139577166, "kl": 0.010833740234375, "learning_rate": 1.5508210920646506e-07, "loss": -0.0316, "num_tokens": 187825569.0, "reward": 5.960464477539063e-08, "reward_std": 0.5191792249679565, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016113125656748647, "rewards/wordcountpos_reward/raw_geo/std": 0.27759527237917514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.14375905768565217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1025.6875, "completions/mean_terminated_length": 994.0667114257812, "completions/min_length": 691.0, "completions/min_terminated_length": 691.0, "epoch": 0.8571714342868574, "frac_reward_zero_std": 0.0, "grad_norm": 3.457618278640866, "kl": 0.0185546875, "learning_rate": 1.5493156338242971e-07, "loss": -0.0582, "num_tokens": 187871436.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8800491094589233, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09455168467355188, "rewards/wordcountpos_reward/raw_geo/std": 0.14886293921634428, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1151.375, "completions/mean_terminated_length": 1151.375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.8573714742948589, "frac_reward_zero_std": 0.0, "grad_norm": 2.906142862617549, "kl": 0.0120849609375, "learning_rate": 1.5478121019555797e-07, "loss": -0.03, "num_tokens": 187914314.0, "reward": 0.0, "reward_std": 0.7537917494773865, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0037793644352552666, "rewards/wordcountpos_reward/raw_geo/std": 0.0938453097071977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1059.125, "completions/mean_terminated_length": 1059.125, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.8575715143028606, "frac_reward_zero_std": 0.0, "grad_norm": 2.4487153521429663, "kl": 0.0118560791015625, "learning_rate": 1.5463104971916276e-07, "loss": 0.0085, "num_tokens": 187960244.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9522835612297058, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0023574725220843083, "rewards/wordcountpos_reward/raw_geo/std": 0.15399668437128577, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1140.0625, "completions/mean_terminated_length": 1140.0625, "completions/min_length": 637.0, "completions/min_terminated_length": 637.0, "epoch": 0.8577715543108622, "frac_reward_zero_std": 0.0, "grad_norm": 2.926767199187593, "kl": 0.0166168212890625, "learning_rate": 1.5448108202646303e-07, "loss": -0.1039, "num_tokens": 188003997.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0160882472991943, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.003514559261366756, "rewards/wordcountpos_reward/raw_geo/std": 0.11373090855382267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1060.5, "completions/mean_terminated_length": 959.0769653320312, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.8579715943188637, "frac_reward_zero_std": 0.0, "grad_norm": 2.041144035565726, "kl": 0.008209228515625, "learning_rate": 1.5433130719058385e-07, "loss": -0.0106, "num_tokens": 188052573.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0420098304748535, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1767635669735801, "rewards/wordcountpos_reward/raw_geo/std": 0.12813504282603957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1222.375, "completions/mean_terminated_length": 1158.3077392578125, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.8581716343268654, "frac_reward_zero_std": 0.0, "grad_norm": 3.162633755877356, "kl": 0.0152587890625, "learning_rate": 1.5418172528455585e-07, "loss": -0.0399, "num_tokens": 188098971.0, "reward": 0.0, "reward_std": 1.060503602027893, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04817666737706237, "rewards/wordcountpos_reward/raw_geo/std": 0.09784907178234764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1063.4375, "completions/mean_terminated_length": 1063.4375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.8583716743348669, "frac_reward_zero_std": 0.0, "grad_norm": 3.517392328482794, "kl": 0.01788330078125, "learning_rate": 1.5403233638131596e-07, "loss": 0.0056, "num_tokens": 188140330.0, "reward": 0.0, "reward_std": 0.8949387669563293, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2369812806113075, "rewards/wordcountpos_reward/raw_geo/std": 0.2243847474206915, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1196.75, "completions/mean_terminated_length": 1126.769287109375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.8585717143428686, "frac_reward_zero_std": 0.0, "grad_norm": 2.5793502053595128, "kl": 0.0154571533203125, "learning_rate": 1.5388314055370695e-07, "loss": -0.0227, "num_tokens": 188183998.0, "reward": -9.313225746154785e-09, "reward_std": 0.9379696846008301, "rewards/wordcountpos_reward/mean": -9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.010511090486111267, "rewards/wordcountpos_reward/raw_geo/std": 0.023844521539826392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1152.1875, "completions/mean_terminated_length": 1152.1875, "completions/min_length": 746.0, "completions/min_terminated_length": 746.0, "epoch": 0.8587717543508702, "frac_reward_zero_std": 0.0, "grad_norm": 3.345453148320116, "kl": 0.019073486328125, "learning_rate": 1.5373413787447738e-07, "loss": -0.0253, "num_tokens": 188222929.0, "reward": 4.470348358154297e-08, "reward_std": 0.9587357044219971, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2862354435949561, "rewards/wordcountpos_reward/raw_geo/std": 0.34047686618932615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1138.625, "completions/mean_terminated_length": 1114.533447265625, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.8589717943588717, "frac_reward_zero_std": 0.0, "grad_norm": 3.3953242065017957, "kl": 0.0167388916015625, "learning_rate": 1.5358532841628155e-07, "loss": -0.0671, "num_tokens": 188267003.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0409488677978516, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01723054639742368, "rewards/wordcountpos_reward/raw_geo/std": 0.06197269554422703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1350.6875, "completions/mean_terminated_length": 1234.5555419921875, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.8591718343668734, "frac_reward_zero_std": 0.0, "grad_norm": 2.907527845078451, "kl": 0.0171356201171875, "learning_rate": 1.5343671225167965e-07, "loss": 0.0012, "num_tokens": 188318782.0, "reward": 0.0, "reward_std": 0.7960062623023987, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06540341424692005, "rewards/wordcountpos_reward/raw_geo/std": 0.2527354138965573, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1238.125, "completions/mean_terminated_length": 1200.71435546875, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.8593718743748749, "frac_reward_zero_std": 0.0, "grad_norm": 2.89730188214079, "kl": 0.013519287109375, "learning_rate": 1.5328828945313764e-07, "loss": 0.0146, "num_tokens": 188354504.0, "reward": 5.960464477539063e-08, "reward_std": 0.5208063721656799, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1820948914547989, "rewards/wordcountpos_reward/raw_geo/std": 0.19592785496416765, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1045.125, "completions/mean_terminated_length": 1045.125, "completions/min_length": 709.0, "completions/min_terminated_length": 709.0, "epoch": 0.8595719143828766, "frac_reward_zero_std": 0.0, "grad_norm": 3.663308443760397, "kl": 0.017913818359375, "learning_rate": 1.531400600930271e-07, "loss": -0.0682, "num_tokens": 188400154.0, "reward": -5.960464477539063e-08, "reward_std": 0.3724924325942993, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.056373473895561535, "rewards/wordcountpos_reward/raw_geo/std": 0.18524977391742703, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185828, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1066.25, "completions/mean_terminated_length": 1066.25, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.8597719543908782, "frac_reward_zero_std": 0.0, "grad_norm": 3.5430794210477443, "kl": 0.019500732421875, "learning_rate": 1.5299202424362534e-07, "loss": -0.0313, "num_tokens": 188436022.0, "reward": 0.0, "reward_std": 0.7481282949447632, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22423368457567586, "rewards/wordcountpos_reward/raw_geo/std": 0.19357540712730248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1089.6875, "completions/mean_terminated_length": 1062.3333740234375, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.8599719943988797, "frac_reward_zero_std": 0.0, "grad_norm": 3.4745026437550752, "kl": 0.01837158203125, "learning_rate": 1.5284418197711535e-07, "loss": -0.0126, "num_tokens": 188477105.0, "reward": 7.450580596923828e-09, "reward_std": 1.0533428192138672, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1186151158274256, "rewards/wordcountpos_reward/raw_geo/std": 0.1026754717123215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1408.375, "completions/mean_terminated_length": 1353.4000244140625, "completions/min_length": 1190.0, "completions/min_terminated_length": 1190.0, "epoch": 0.8601720344068814, "frac_reward_zero_std": 0.0, "grad_norm": 2.708591056296263, "kl": 0.0131683349609375, "learning_rate": 1.5269653336558575e-07, "loss": -0.0298, "num_tokens": 188529191.0, "reward": 0.0, "reward_std": 0.5635997653007507, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09913708067438355, "rewards/wordcountpos_reward/raw_geo/std": 0.10068025177030075, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1087.375, "completions/mean_terminated_length": 1087.375, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.860372074414883, "frac_reward_zero_std": 0.0, "grad_norm": 3.5869656549021807, "kl": 0.017730712890625, "learning_rate": 1.5254907848103053e-07, "loss": -0.0138, "num_tokens": 188570397.0, "reward": 0.0, "reward_std": 0.7265821695327759, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10148080036905603, "rewards/wordcountpos_reward/raw_geo/std": 0.11609936492749459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9875, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1171.3125, "completions/mean_terminated_length": 1171.3125, "completions/min_length": 662.0, "completions/min_terminated_length": 662.0, "epoch": 0.8605721144228846, "frac_reward_zero_std": 0.0, "grad_norm": 3.623070418794166, "kl": 0.0230712890625, "learning_rate": 1.524018173953494e-07, "loss": -0.0111, "num_tokens": 188619738.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8518112897872925, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12777305455967916, "rewards/wordcountpos_reward/raw_geo/std": 0.13106819946907838, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901161, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1030.0, "completions/mean_terminated_length": 1030.0, "completions/min_length": 695.0, "completions/min_terminated_length": 695.0, "epoch": 0.8607721544308862, "frac_reward_zero_std": 0.0, "grad_norm": 3.5622045994136893, "kl": 0.019805908203125, "learning_rate": 1.5225475018034756e-07, "loss": -0.0201, "num_tokens": 188667266.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9350059032440186, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09028742864050995, "rewards/wordcountpos_reward/raw_geo/std": 0.07320196831860683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 1088.8125, "completions/mean_terminated_length": 1088.8125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.8609721944388877, "frac_reward_zero_std": 0.0, "grad_norm": 3.4975917931438327, "kl": 0.01959228515625, "learning_rate": 1.5210787690773578e-07, "loss": -0.0261, "num_tokens": 188715079.0, "reward": 0.0, "reward_std": 0.9763508439064026, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06762501099467212, "rewards/wordcountpos_reward/raw_geo/std": 0.16210753361956334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185828, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1377.625, "completions/mean_terminated_length": 1369.466796875, "completions/min_length": 1203.0, "completions/min_terminated_length": 1203.0, "epoch": 0.8611722344468894, "frac_reward_zero_std": 0.0, "grad_norm": 2.452988439394771, "kl": 0.01068115234375, "learning_rate": 1.5196119764912973e-07, "loss": 0.0039, "num_tokens": 188759521.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6200947761535645, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004243821783094769, "rewards/wordcountpos_reward/raw_geo/std": 0.22369966692820276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1767505042163692, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1015.5, "completions/mean_terminated_length": 1015.5, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.861372274454891, "frac_reward_zero_std": 0.0, "grad_norm": 3.4509149046933607, "kl": 0.018951416015625, "learning_rate": 1.518147124760515e-07, "loss": -0.0369, "num_tokens": 188798649.0, "reward": 0.0, "reward_std": 0.8334959745407104, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11278309241780425, "rewards/wordcountpos_reward/raw_geo/std": 0.10724640346338694, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965647, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1280.0, "completions/max_terminated_length": 1280.0, "completions/mean_length": 1120.0625, "completions/mean_terminated_length": 1120.0625, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.8615723144628926, "frac_reward_zero_std": 0.0, "grad_norm": 3.080831975177101, "kl": 0.0167388916015625, "learning_rate": 1.5166842145992737e-07, "loss": 0.0121, "num_tokens": 188848738.0, "reward": 0.0, "reward_std": 0.4839927852153778, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07774053649063667, "rewards/wordcountpos_reward/raw_geo/std": 0.1207846148938494, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1097.5625, "completions/mean_terminated_length": 1097.5625, "completions/min_length": 770.0, "completions/min_terminated_length": 770.0, "epoch": 0.8617723544708942, "frac_reward_zero_std": 0.0, "grad_norm": 3.285956379846462, "kl": 0.018463134765625, "learning_rate": 1.5152232467208976e-07, "loss": -0.0307, "num_tokens": 188886763.0, "reward": 0.0, "reward_std": 0.6808652877807617, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.056792386090111543, "rewards/wordcountpos_reward/raw_geo/std": 0.07535152025628296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1105.375, "completions/mean_terminated_length": 1105.375, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.8619723944788957, "frac_reward_zero_std": 0.0, "grad_norm": 3.4844829486340965, "kl": 0.01934814453125, "learning_rate": 1.513764221837761e-07, "loss": -0.0494, "num_tokens": 188927169.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9158785939216614, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17603535484519256, "rewards/wordcountpos_reward/raw_geo/std": 0.2009622515210898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1125.5, "completions/mean_terminated_length": 1125.5, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.8621724344868974, "frac_reward_zero_std": 0.0, "grad_norm": 3.3021990069638654, "kl": 0.0185394287109375, "learning_rate": 1.5123071406612917e-07, "loss": -0.0442, "num_tokens": 188968065.0, "reward": 2.60770320892334e-08, "reward_std": 1.0677542686462402, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17329441814682703, "rewards/wordcountpos_reward/raw_geo/std": 0.16312922052902973, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1434.0, "completions/mean_length": 1182.125, "completions/mean_terminated_length": 1160.933349609375, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.862372474494899, "frac_reward_zero_std": 0.0, "grad_norm": 3.3881785641789715, "kl": 0.0187530517578125, "learning_rate": 1.5108520039019678e-07, "loss": 0.0042, "num_tokens": 189011491.0, "reward": 0.0, "reward_std": 0.9200870990753174, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.060251373772260114, "rewards/wordcountpos_reward/raw_geo/std": 0.10263121565039726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.11855612829185828, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1079.0625, "completions/mean_terminated_length": 1079.0625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.8625725145029006, "frac_reward_zero_std": 0.0, "grad_norm": 3.691523620870395, "kl": 0.019927978515625, "learning_rate": 1.5093988122693206e-07, "loss": -0.0051, "num_tokens": 189062852.0, "reward": 0.0, "reward_std": 0.7633024454116821, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17767242659214524, "rewards/wordcountpos_reward/raw_geo/std": 0.16294571207713468, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1334.6875, "completions/mean_terminated_length": 1169.375, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.8627725545109022, "frac_reward_zero_std": 0.0, "grad_norm": 2.442887923008232, "kl": 0.01190185546875, "learning_rate": 1.5079475664719337e-07, "loss": 0.0067, "num_tokens": 189118543.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9717979431152344, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04741098099740427, "rewards/wordcountpos_reward/raw_geo/std": 0.23256469645932623, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1021.6875, "completions/mean_terminated_length": 1021.6875, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.8629725945189037, "frac_reward_zero_std": 0.0, "grad_norm": 3.6621252777345266, "kl": 0.02001953125, "learning_rate": 1.5064982672174425e-07, "loss": -0.0007, "num_tokens": 189144618.0, "reward": 0.0, "reward_std": 1.0187244415283203, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04632109606080201, "rewards/wordcountpos_reward/raw_geo/std": 0.09851207483970874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1196.9375, "completions/mean_terminated_length": 1196.9375, "completions/min_length": 1021.0, "completions/min_terminated_length": 1021.0, "epoch": 0.8631726345269054, "frac_reward_zero_std": 0.0, "grad_norm": 3.1467718131298605, "kl": 0.0163421630859375, "learning_rate": 1.5050509152125285e-07, "loss": 0.002, "num_tokens": 189185641.0, "reward": -2.9802322387695312e-08, "reward_std": 0.99405437707901, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0002802270171231242, "rewards/wordcountpos_reward/raw_geo/std": 0.034350430845731875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1122.1875, "completions/mean_terminated_length": 1122.1875, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.863372674534907, "frac_reward_zero_std": 0.0, "grad_norm": 2.8558816128140787, "kl": 0.015411376953125, "learning_rate": 1.5036055111629293e-07, "loss": 0.0015, "num_tokens": 189226564.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0575404167175293, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10747083700707495, "rewards/wordcountpos_reward/raw_geo/std": 0.11525354801699897, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.0824396524513313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1413.8125, "completions/mean_terminated_length": 1362.0999755859375, "completions/min_length": 1231.0, "completions/min_terminated_length": 1231.0, "epoch": 0.8635727145429086, "frac_reward_zero_std": 0.0, "grad_norm": 2.7975593719366443, "kl": 0.01202392578125, "learning_rate": 1.5021620557734298e-07, "loss": 0.0185, "num_tokens": 189279753.0, "reward": 0.0, "reward_std": 0.9676237106323242, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16218395093038857, "rewards/wordcountpos_reward/raw_geo/std": 0.10156754926810709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1228.0, "completions/mean_terminated_length": 1189.1429443359375, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.8637727545509102, "frac_reward_zero_std": 0.0, "grad_norm": 2.115097213370927, "kl": 0.0127105712890625, "learning_rate": 1.5007205497478672e-07, "loss": 0.0156, "num_tokens": 189332145.0, "reward": 0.0, "reward_std": 0.29891180992126465, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04804627750605159, "rewards/wordcountpos_reward/raw_geo/std": 0.13829557017040775, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.13984117975602023, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1161.5, "completions/mean_terminated_length": 1161.5, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.8639727945589117, "frac_reward_zero_std": 0.0, "grad_norm": 3.440005485765127, "kl": 0.0169219970703125, "learning_rate": 1.4992809937891236e-07, "loss": -0.0006, "num_tokens": 189374601.0, "reward": 0.0, "reward_std": 0.5712407827377319, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04592205449972079, "rewards/wordcountpos_reward/raw_geo/std": 0.12300585310727905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1184.0625, "completions/mean_terminated_length": 1184.0625, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.8641728345669134, "frac_reward_zero_std": 0.0, "grad_norm": 2.9791095686053493, "kl": 0.01702880859375, "learning_rate": 1.4978433885991343e-07, "loss": 0.0122, "num_tokens": 189411450.0, "reward": 0.0, "reward_std": 0.8268992900848389, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02694116816629784, "rewards/wordcountpos_reward/raw_geo/std": 0.05875985133083417, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1334.875, "completions/mean_terminated_length": 1279.8333740234375, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.864372874574915, "frac_reward_zero_std": 0.0, "grad_norm": 1.908633976355302, "kl": 0.00830841064453125, "learning_rate": 1.496407734878882e-07, "loss": -0.0403, "num_tokens": 189468144.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6629046201705933, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0909386644738009, "rewards/wordcountpos_reward/raw_geo/std": 0.109236759726149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952499, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1290.6875, "completions/mean_terminated_length": 1276.7333984375, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.8645729145829166, "frac_reward_zero_std": 0.0, "grad_norm": 2.231897865090521, "kl": 0.011505126953125, "learning_rate": 1.494974033328399e-07, "loss": 0.021, "num_tokens": 189509483.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9930434226989746, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13507541002664297, "rewards/wordcountpos_reward/raw_geo/std": 0.08460524024704692, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1056.25, "completions/mean_terminated_length": 1056.25, "completions/min_length": 839.0, "completions/min_terminated_length": 839.0, "epoch": 0.8647729545909182, "frac_reward_zero_std": 0.0, "grad_norm": 2.8279807738910545, "kl": 0.01483154296875, "learning_rate": 1.4935422846467622e-07, "loss": 0.0462, "num_tokens": 189541015.0, "reward": 0.0, "reward_std": 0.6814937591552734, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03737278312240844, "rewards/wordcountpos_reward/raw_geo/std": 0.05590798449870615, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1087.3125, "completions/mean_terminated_length": 1087.3125, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.8649729945989197, "frac_reward_zero_std": 0.0, "grad_norm": 3.3675842373247593, "kl": 0.017242431640625, "learning_rate": 1.492112489532102e-07, "loss": -0.0218, "num_tokens": 189578244.0, "reward": 0.0, "reward_std": 0.8641718626022339, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05472881974907825, "rewards/wordcountpos_reward/raw_geo/std": 0.046353241620110584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1159.3125, "completions/mean_terminated_length": 1159.3125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.8651730346069214, "frac_reward_zero_std": 0.0, "grad_norm": 3.4624272526181454, "kl": 0.017578125, "learning_rate": 1.4906846486815907e-07, "loss": -0.021, "num_tokens": 189621865.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5249384045600891, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1726094764570387, "rewards/wordcountpos_reward/raw_geo/std": 0.20516220050222791, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1458.6875, "completions/mean_terminated_length": 1389.8333740234375, "completions/min_length": 1323.0, "completions/min_terminated_length": 1323.0, "epoch": 0.865373074614923, "frac_reward_zero_std": 0.0, "grad_norm": 2.46474542266734, "kl": 0.0159149169921875, "learning_rate": 1.4892587627914513e-07, "loss": -0.0024, "num_tokens": 189670748.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8348680138587952, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012912831190729458, "rewards/wordcountpos_reward/raw_geo/std": 0.03231848011985347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1147.0, "completions/max_terminated_length": 1147.0, "completions/mean_length": 998.25, "completions/mean_terminated_length": 998.25, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.8655731146229246, "frac_reward_zero_std": 0.0, "grad_norm": 2.5352833286264986, "kl": 0.027130126953125, "learning_rate": 1.48783483255695e-07, "loss": -0.0372, "num_tokens": 189713224.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0357285737991333, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16661071483555057, "rewards/wordcountpos_reward/raw_geo/std": 0.08890243906191983, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1118.0, "completions/max_terminated_length": 1118.0, "completions/mean_length": 804.6875, "completions/mean_terminated_length": 804.6875, "completions/min_length": 673.0, "completions/min_terminated_length": 673.0, "epoch": 0.8657731546309262, "frac_reward_zero_std": 0.0, "grad_norm": 3.2491238307942676, "kl": 0.021148681640625, "learning_rate": 1.486412858672405e-07, "loss": -0.0143, "num_tokens": 189748843.0, "reward": 0.0, "reward_std": 0.9069240689277649, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05159769466585485, "rewards/wordcountpos_reward/raw_geo/std": 0.1062857812157035, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1138.0625, "completions/mean_terminated_length": 1138.0625, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.8659731946389277, "frac_reward_zero_std": 0.0, "grad_norm": 3.3709236206644526, "kl": 0.01953125, "learning_rate": 1.4849928418311742e-07, "loss": 0.0144, "num_tokens": 189802308.0, "reward": 2.9802322387695312e-08, "reward_std": 0.785851001739502, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.012875988863199234, "rewards/wordcountpos_reward/raw_geo/std": 0.039022851456884965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1390.5, "completions/mean_terminated_length": 1281.0, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.8661732346469294, "frac_reward_zero_std": 0.0, "grad_norm": 2.220249098024878, "kl": 0.00974273681640625, "learning_rate": 1.4835747827256657e-07, "loss": -0.0254, "num_tokens": 189839996.0, "reward": 2.60770320892334e-08, "reward_std": 1.052979826927185, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07209786232951426, "rewards/wordcountpos_reward/raw_geo/std": 0.03418225732958945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1119.4375, "completions/mean_terminated_length": 1094.0667724609375, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.866373274654931, "frac_reward_zero_std": 0.0, "grad_norm": 3.2130221309371523, "kl": 0.014923095703125, "learning_rate": 1.4821586820473314e-07, "loss": -0.0661, "num_tokens": 189877691.0, "reward": 0.0, "reward_std": 0.8187586665153503, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006919155234649487, "rewards/wordcountpos_reward/raw_geo/std": 0.048357804418524226, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 1002.6875, "completions/mean_terminated_length": 1002.6875, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.8665733146629326, "frac_reward_zero_std": 0.0, "grad_norm": 2.1780617775733866, "kl": 0.01016998291015625, "learning_rate": 1.4807445404866693e-07, "loss": -0.0206, "num_tokens": 189922030.0, "reward": 0.0, "reward_std": 1.0246851444244385, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003015124677625473, "rewards/wordcountpos_reward/raw_geo/std": 0.023219426298773528, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1441.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1139.25, "completions/mean_terminated_length": 1139.25, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.8667733546709342, "frac_reward_zero_std": 0.0, "grad_norm": 3.0312762903582144, "kl": 0.017120361328125, "learning_rate": 1.479332358733219e-07, "loss": 0.0001, "num_tokens": 189964202.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0494788885116577, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12115727412014911, "rewards/wordcountpos_reward/raw_geo/std": 0.10130546598724424, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18459164139817943, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1087.0625, "completions/mean_terminated_length": 1087.0625, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.8669733946789357, "frac_reward_zero_std": 0.0, "grad_norm": 4.794591807523598, "kl": 0.02850341796875, "learning_rate": 1.477922137475568e-07, "loss": 0.0235, "num_tokens": 190009267.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0316047668457031, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2168176203330245, "rewards/wordcountpos_reward/raw_geo/std": 0.05462075450047496, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928167, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1386.5625, "completions/mean_terminated_length": 1273.125, "completions/min_length": 1134.0, "completions/min_terminated_length": 1134.0, "epoch": 0.8671734346869374, "frac_reward_zero_std": 0.0, "grad_norm": 1.9921914913006813, "kl": 0.008819580078125, "learning_rate": 1.4765138774013472e-07, "loss": 0.0016, "num_tokens": 190071812.0, "reward": -2.9802322387695312e-08, "reward_std": 0.43432459235191345, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0576225964518499, "rewards/wordcountpos_reward/raw_geo/std": 0.0953654829715352, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1086.25, "completions/mean_terminated_length": 1086.25, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.867373474694939, "frac_reward_zero_std": 0.0, "grad_norm": 3.5732205031421946, "kl": 0.017913818359375, "learning_rate": 1.4751075791972309e-07, "loss": 0.0361, "num_tokens": 190114976.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0095019340515137, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05472091048258514, "rewards/wordcountpos_reward/raw_geo/std": 0.09111788283289488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1285.5625, "completions/mean_terminated_length": 1285.5625, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.8675735147029406, "frac_reward_zero_std": 0.0, "grad_norm": 3.379159382442647, "kl": 0.02081298828125, "learning_rate": 1.4737032435489345e-07, "loss": -0.0104, "num_tokens": 190161089.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9839221239089966, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013375148168090751, "rewards/wordcountpos_reward/raw_geo/std": 0.08962091629739301, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1290.8125, "completions/mean_terminated_length": 1242.5384521484375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.8677735547109422, "frac_reward_zero_std": 0.0, "grad_norm": 3.245751763250617, "kl": 0.021697998046875, "learning_rate": 1.4723008711412194e-07, "loss": -0.0115, "num_tokens": 190207342.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9512434005737305, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13531453126031912, "rewards/wordcountpos_reward/raw_geo/std": 0.16884187067319975, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1116.8125, "completions/mean_terminated_length": 1116.8125, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.8679735947189438, "frac_reward_zero_std": 0.0, "grad_norm": 3.6776326302115843, "kl": 0.019805908203125, "learning_rate": 1.470900462657889e-07, "loss": 0.0029, "num_tokens": 190243683.0, "reward": 0.0, "reward_std": 0.493010938167572, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06040119042113627, "rewards/wordcountpos_reward/raw_geo/std": 0.25680256625231174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1060223596263578, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1129.8125, "completions/mean_terminated_length": 1129.8125, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.8681736347269454, "frac_reward_zero_std": 0.0, "grad_norm": 3.2685543632464533, "kl": 0.017913818359375, "learning_rate": 1.46950201878179e-07, "loss": -0.0039, "num_tokens": 190282760.0, "reward": 0.0, "reward_std": 1.0007719993591309, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11503614285067494, "rewards/wordcountpos_reward/raw_geo/std": 0.18117205606553166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1223.0, "completions/mean_terminated_length": 1204.533447265625, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.868373674734947, "frac_reward_zero_std": 0.0, "grad_norm": 2.359211857303913, "kl": 0.01055145263671875, "learning_rate": 1.4681055401948075e-07, "loss": 0.0022, "num_tokens": 190325736.0, "reward": 0.0, "reward_std": 0.8504508137702942, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05457489740993146, "rewards/wordcountpos_reward/raw_geo/std": 0.11600732441105324, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1071.75, "completions/mean_terminated_length": 1071.75, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.8685737147429486, "frac_reward_zero_std": 0.0, "grad_norm": 2.682004748271509, "kl": 0.013275146484375, "learning_rate": 1.466711027577872e-07, "loss": 0.0331, "num_tokens": 190362692.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0644553899765015, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04306739618660482, "rewards/wordcountpos_reward/raw_geo/std": 0.06361859535254479, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 915.75, "completions/mean_terminated_length": 915.75, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.8687737547509502, "frac_reward_zero_std": 0.0, "grad_norm": 3.3882171485742343, "kl": 0.0162811279296875, "learning_rate": 1.4653184816109548e-07, "loss": -0.0403, "num_tokens": 190389000.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5040131211280823, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11327397007368134, "rewards/wordcountpos_reward/raw_geo/std": 0.17785119346449596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1017.9375, "completions/mean_terminated_length": 1017.9375, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.8689737947589518, "frac_reward_zero_std": 0.0, "grad_norm": 3.8492254718892314, "kl": 0.02191162109375, "learning_rate": 1.4639279029730678e-07, "loss": -0.0367, "num_tokens": 190438639.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9348011016845703, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008910527908950203, "rewards/wordcountpos_reward/raw_geo/std": 0.050042559697674696, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1338185615204685, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1101.0625, "completions/mean_terminated_length": 1101.0625, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.8691738347669534, "frac_reward_zero_std": 0.0, "grad_norm": 2.7777105876866313, "kl": 0.0133514404296875, "learning_rate": 1.4625392923422604e-07, "loss": 0.0368, "num_tokens": 190478792.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8506228923797607, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07776776917104386, "rewards/wordcountpos_reward/raw_geo/std": 0.10652582608345462, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12041594578792297, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1122.8125, "completions/mean_terminated_length": 1122.8125, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.869373874774955, "frac_reward_zero_std": 0.0, "grad_norm": 2.981018079666783, "kl": 0.015167236328125, "learning_rate": 1.4611526503956302e-07, "loss": 0.0091, "num_tokens": 190509933.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6884787082672119, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07555527834585586, "rewards/wordcountpos_reward/raw_geo/std": 0.12966679935930145, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 966.6875, "completions/mean_terminated_length": 966.6875, "completions/min_length": 593.0, "completions/min_terminated_length": 593.0, "epoch": 0.8695739147829565, "frac_reward_zero_std": 0.0, "grad_norm": 3.164176320192437, "kl": 0.0169219970703125, "learning_rate": 1.4597679778093062e-07, "loss": -0.0613, "num_tokens": 190540632.0, "reward": 0.0, "reward_std": 0.6533827781677246, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09150188766847489, "rewards/wordcountpos_reward/raw_geo/std": 0.11862971933060278, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1276569477008451, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1228.8125, "completions/mean_terminated_length": 1190.071533203125, "completions/min_length": 1066.0, "completions/min_terminated_length": 1066.0, "epoch": 0.8697739547909582, "frac_reward_zero_std": 0.0, "grad_norm": 2.992907940782826, "kl": 0.0154571533203125, "learning_rate": 1.4583852752584638e-07, "loss": -0.0028, "num_tokens": 190574253.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5560944080352783, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14702924866775652, "rewards/wordcountpos_reward/raw_geo/std": 0.15248848659076392, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970786, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1188.8125, "completions/mean_terminated_length": 1168.0667724609375, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.8699739947989598, "frac_reward_zero_std": 0.0, "grad_norm": 3.274523033067454, "kl": 0.0183868408203125, "learning_rate": 1.4570045434173117e-07, "loss": 0.0285, "num_tokens": 190614210.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9650782942771912, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08407378395488085, "rewards/wordcountpos_reward/raw_geo/std": 0.06107046749174397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1054.3125, "completions/mean_terminated_length": 1054.3125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.8701740348069614, "frac_reward_zero_std": 0.0, "grad_norm": 3.7591113352357373, "kl": 0.0179443359375, "learning_rate": 1.4556257829591053e-07, "loss": 0.0006, "num_tokens": 190654143.0, "reward": 0.0, "reward_std": 0.6902391910552979, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1198508910139199, "rewards/wordcountpos_reward/raw_geo/std": 0.12896715008256562, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1176.6875, "completions/mean_terminated_length": 1155.1334228515625, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.870374074814963, "frac_reward_zero_std": 0.0, "grad_norm": 2.3582540863066206, "kl": 0.0108642578125, "learning_rate": 1.4542489945561314e-07, "loss": 0.028, "num_tokens": 190700706.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0281380414962769, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00315918634685772, "rewards/wordcountpos_reward/raw_geo/std": 0.0582165556618112, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08153617692869926, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1214.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 910.5625, "completions/mean_terminated_length": 910.5625, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.8705741148229645, "frac_reward_zero_std": 0.0, "grad_norm": 3.0635519978721604, "kl": 0.0142059326171875, "learning_rate": 1.4528741788797185e-07, "loss": -0.0172, "num_tokens": 190730427.0, "reward": 0.0, "reward_std": 0.6807907819747925, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.049954743904485295, "rewards/wordcountpos_reward/raw_geo/std": 0.06916522198331897, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1252.75, "completions/mean_terminated_length": 1252.75, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.8707741548309662, "frac_reward_zero_std": 0.0, "grad_norm": 3.1097395725859975, "kl": 0.018768310546875, "learning_rate": 1.4515013366002348e-07, "loss": 0.0588, "num_tokens": 190779447.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0052345991134644, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00548685224578575, "rewards/wordcountpos_reward/raw_geo/std": 0.35214261159611526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1271.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1115.1875, "completions/mean_terminated_length": 1115.1875, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.8709741948389678, "frac_reward_zero_std": 0.0, "grad_norm": 2.9963816754536627, "kl": 0.01617431640625, "learning_rate": 1.4501304683870824e-07, "loss": 0.0049, "num_tokens": 190822210.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9212446808815002, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05233615750476808, "rewards/wordcountpos_reward/raw_geo/std": 0.11598390506856361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1157.4375, "completions/mean_terminated_length": 1134.60009765625, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.8711742348469694, "frac_reward_zero_std": 0.0, "grad_norm": 3.132969965975199, "kl": 0.01519775390625, "learning_rate": 1.4487615749087044e-07, "loss": -0.0079, "num_tokens": 190865337.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9065794348716736, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07077217504529719, "rewards/wordcountpos_reward/raw_geo/std": 0.1259816266047361, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.23077967687658127, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1020.1875, "completions/mean_terminated_length": 1020.1875, "completions/min_length": 845.0, "completions/min_terminated_length": 845.0, "epoch": 0.871374274854971, "frac_reward_zero_std": 0.0, "grad_norm": 2.9631363909287174, "kl": 0.01428985595703125, "learning_rate": 1.4473946568325767e-07, "loss": -0.0093, "num_tokens": 190915060.0, "reward": 0.0, "reward_std": 0.7938600778579712, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09634899525075527, "rewards/wordcountpos_reward/raw_geo/std": 0.06945637931940594, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1223.4375, "completions/mean_terminated_length": 1183.9285888671875, "completions/min_length": 986.0, "completions/min_terminated_length": 986.0, "epoch": 0.8715743148629725, "frac_reward_zero_std": 0.0, "grad_norm": 2.894341590002559, "kl": 0.01287841796875, "learning_rate": 1.4460297148252197e-07, "loss": 0.016, "num_tokens": 190952579.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7415887713432312, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2766450495769817, "rewards/wordcountpos_reward/raw_geo/std": 0.23142881394885145, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1265.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1071.625, "completions/mean_terminated_length": 1071.625, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.8717743548709742, "frac_reward_zero_std": 0.0, "grad_norm": 2.9116232346072697, "kl": 0.012603759765625, "learning_rate": 1.4446667495521806e-07, "loss": -0.0355, "num_tokens": 190991389.0, "reward": 0.0, "reward_std": 0.8035182356834412, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05176847291845004, "rewards/wordcountpos_reward/raw_geo/std": 0.06944675570773717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 1007.875, "completions/mean_terminated_length": 1007.875, "completions/min_length": 821.0, "completions/min_terminated_length": 821.0, "epoch": 0.8719743948789758, "frac_reward_zero_std": 0.0, "grad_norm": 3.537367407692665, "kl": 0.0178070068359375, "learning_rate": 1.4433057616780498e-07, "loss": -0.0315, "num_tokens": 191036395.0, "reward": -2.9802322387695312e-08, "reward_std": 0.861401379108429, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01391854602654815, "rewards/wordcountpos_reward/raw_geo/std": 0.2144443993686302, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059472994182545036, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1128.5625, "completions/mean_terminated_length": 1128.5625, "completions/min_length": 810.0, "completions/min_terminated_length": 810.0, "epoch": 0.8721744348869774, "frac_reward_zero_std": 0.0, "grad_norm": 3.4404904633421864, "kl": 0.018157958984375, "learning_rate": 1.44194675186645e-07, "loss": 0.0256, "num_tokens": 191074492.0, "reward": 7.450580596923828e-09, "reward_std": 0.9591312408447266, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04429827925827902, "rewards/wordcountpos_reward/raw_geo/std": 0.0674378145452476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1119.0625, "completions/mean_terminated_length": 1119.0625, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.872374474894979, "frac_reward_zero_std": 0.0, "grad_norm": 2.9584865774906093, "kl": 0.01751708984375, "learning_rate": 1.4405897207800428e-07, "loss": -0.0235, "num_tokens": 191107693.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9132064580917358, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0758412030712857, "rewards/wordcountpos_reward/raw_geo/std": 0.14546853003055843, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1061.875, "completions/mean_terminated_length": 1061.875, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.8725745149029805, "frac_reward_zero_std": 0.0, "grad_norm": 3.2312582663973566, "kl": 0.0170440673828125, "learning_rate": 1.4392346690805202e-07, "loss": -0.0242, "num_tokens": 191148403.0, "reward": 0.0, "reward_std": 0.9089673757553101, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.025035618821970522, "rewards/wordcountpos_reward/raw_geo/std": 0.06950712799065661, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 1184.25, "completions/mean_terminated_length": 868.5, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.8727745549109822, "frac_reward_zero_std": 0.0, "grad_norm": 2.9963017404406598, "kl": 0.016082763671875, "learning_rate": 1.437881597428612e-07, "loss": -0.0246, "num_tokens": 191193095.0, "reward": 0.0, "reward_std": 0.9467294216156006, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.018253597694272476, "rewards/wordcountpos_reward/raw_geo/std": 0.08870772466751199, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1175.9375, "completions/mean_terminated_length": 1175.9375, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.8729745949189838, "frac_reward_zero_std": 0.0, "grad_norm": 2.95118612109172, "kl": 0.0163726806640625, "learning_rate": 1.4365305064840827e-07, "loss": -0.0157, "num_tokens": 191238150.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9373791813850403, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.24259801324430944, "rewards/wordcountpos_reward/raw_geo/std": 0.08769512618066144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1236.625, "completions/mean_terminated_length": 1236.625, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.8731746349269854, "frac_reward_zero_std": 0.0, "grad_norm": 3.2930139908103335, "kl": 0.0156707763671875, "learning_rate": 1.4351813969057313e-07, "loss": 0.022, "num_tokens": 191278744.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9716408848762512, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0032247004615023897, "rewards/wordcountpos_reward/raw_geo/std": 0.0579870789670252, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11603000888978234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1492.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1313.125, "completions/mean_terminated_length": 1313.125, "completions/min_length": 1144.0, "completions/min_terminated_length": 1144.0, "epoch": 0.873374674934987, "frac_reward_zero_std": 0.0, "grad_norm": 2.1750314639518753, "kl": 0.0123291015625, "learning_rate": 1.4338342693513876e-07, "loss": 0.0187, "num_tokens": 191326082.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6658838987350464, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04619778275903359, "rewards/wordcountpos_reward/raw_geo/std": 0.23590083803737866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 1142.3125, "completions/mean_terminated_length": 1142.3125, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.8735747149429886, "frac_reward_zero_std": 0.0, "grad_norm": 3.4105426883130954, "kl": 0.019195556640625, "learning_rate": 1.432489124477919e-07, "loss": -0.0061, "num_tokens": 191369559.0, "reward": 0.0, "reward_std": 0.5732342004776001, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11409180517541649, "rewards/wordcountpos_reward/raw_geo/std": 0.12110076371915554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1109.5, "completions/mean_terminated_length": 1109.5, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.8737747549509902, "frac_reward_zero_std": 0.0, "grad_norm": 3.304196040340059, "kl": 0.01727294921875, "learning_rate": 1.431145962941223e-07, "loss": 0.0654, "num_tokens": 191422591.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9269282221794128, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16584058819243805, "rewards/wordcountpos_reward/raw_geo/std": 0.09578323890510171, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 931.0625, "completions/mean_terminated_length": 931.0625, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.8739747949589918, "frac_reward_zero_std": 0.0, "grad_norm": 3.393138517682367, "kl": 0.0179443359375, "learning_rate": 1.429804785396233e-07, "loss": 0.0211, "num_tokens": 191463432.0, "reward": 0.0, "reward_std": 0.6731550693511963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06374547349615364, "rewards/wordcountpos_reward/raw_geo/std": 0.03769204251486648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 957.4375, "completions/mean_terminated_length": 957.4375, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.8741748349669934, "frac_reward_zero_std": 0.0, "grad_norm": 3.215939764191142, "kl": 0.0167236328125, "learning_rate": 1.4284655924969102e-07, "loss": 0.0351, "num_tokens": 191500319.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9852129817008972, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11907816748578892, "rewards/wordcountpos_reward/raw_geo/std": 0.1467407366154375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1047.4375, "completions/mean_terminated_length": 982.7857666015625, "completions/min_length": 757.0, "completions/min_terminated_length": 757.0, "epoch": 0.874374874974995, "frac_reward_zero_std": 0.0, "grad_norm": 3.027015204053927, "kl": 0.01187896728515625, "learning_rate": 1.4271283848962537e-07, "loss": -0.1022, "num_tokens": 191539734.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0176002979278564, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023857344341740874, "rewards/wordcountpos_reward/raw_geo/std": 0.03246242588512726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10183501544346313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1265.1875, "completions/mean_terminated_length": 1186.916748046875, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.8745749149829966, "frac_reward_zero_std": 0.0, "grad_norm": 3.2143944844552506, "kl": 0.0189208984375, "learning_rate": 1.425793163246291e-07, "loss": 0.0317, "num_tokens": 191590353.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7797218561172485, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.050823878583651644, "rewards/wordcountpos_reward/raw_geo/std": 0.061224306203645516, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 1282.4375, "completions/mean_terminated_length": 1113.2222900390625, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.8747749549909982, "frac_reward_zero_std": 0.0, "grad_norm": 3.447443863795195, "kl": 0.0166015625, "learning_rate": 1.424459928198083e-07, "loss": 0.0226, "num_tokens": 191637520.0, "reward": -7.450580596923828e-09, "reward_std": 1.0346715450286865, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.03458244301124338, "rewards/wordcountpos_reward/raw_geo/std": 0.055222246784887255, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1029203215725281, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1193.125, "completions/mean_terminated_length": 1193.125, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.8749749949989998, "frac_reward_zero_std": 0.0, "grad_norm": 2.974632716181278, "kl": 0.0150909423828125, "learning_rate": 1.4231286804017195e-07, "loss": -0.0173, "num_tokens": 191682850.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0149688720703125, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09853333050958286, "rewards/wordcountpos_reward/raw_geo/std": 0.23166457154860054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1118.9375, "completions/mean_terminated_length": 1118.9375, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.8751750350070014, "frac_reward_zero_std": 0.0, "grad_norm": 3.227446985730866, "kl": 0.021026611328125, "learning_rate": 1.421799420506326e-07, "loss": 0.0246, "num_tokens": 191725065.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6124978065490723, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.022135651438745345, "rewards/wordcountpos_reward/raw_geo/std": 0.17140618413603248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1122.9375, "completions/mean_terminated_length": 1122.9375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.875375075015003, "frac_reward_zero_std": 0.0, "grad_norm": 2.4928429186626575, "kl": 0.014923095703125, "learning_rate": 1.420472149160052e-07, "loss": -0.0412, "num_tokens": 191775952.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6131945848464966, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.043397888048085274, "rewards/wordcountpos_reward/raw_geo/std": 0.10889998635186407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 956.5625, "completions/mean_terminated_length": 956.5625, "completions/min_length": 593.0, "completions/min_terminated_length": 593.0, "epoch": 0.8755751150230046, "frac_reward_zero_std": 0.0, "grad_norm": 3.785702214820776, "kl": 0.0206298828125, "learning_rate": 1.4191468670100847e-07, "loss": -0.0228, "num_tokens": 191817849.0, "reward": -1.4901161193847656e-08, "reward_std": 0.98225998878479, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02377135866119341, "rewards/wordcountpos_reward/raw_geo/std": 0.11844085798020774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 1001.6875, "completions/mean_terminated_length": 1001.6875, "completions/min_length": 712.0, "completions/min_terminated_length": 712.0, "epoch": 0.8757751550310062, "frac_reward_zero_std": 0.0, "grad_norm": 3.952546131814619, "kl": 0.02276611328125, "learning_rate": 1.4178235747026334e-07, "loss": 0.0284, "num_tokens": 191869004.0, "reward": -3.725290298461914e-09, "reward_std": 1.0167732238769531, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1354330388568613, "rewards/wordcountpos_reward/raw_geo/std": 0.1777113963361251, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 1089.0, "completions/mean_terminated_length": 1089.0, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.8759751950390078, "frac_reward_zero_std": 0.0, "grad_norm": 3.0822916118930097, "kl": 0.0250091552734375, "learning_rate": 1.4165022728829463e-07, "loss": -0.0092, "num_tokens": 191917708.0, "reward": -5.960464477539063e-08, "reward_std": 0.809794545173645, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02074302627676576, "rewards/wordcountpos_reward/raw_geo/std": 0.3598622693806316, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1187.625, "completions/mean_terminated_length": 1187.625, "completions/min_length": 736.0, "completions/min_terminated_length": 736.0, "epoch": 0.8761752350470094, "frac_reward_zero_std": 0.0, "grad_norm": 2.8848662534214156, "kl": 0.0269775390625, "learning_rate": 1.415182962195292e-07, "loss": -0.0295, "num_tokens": 191963302.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0395618677139282, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020807511227868086, "rewards/wordcountpos_reward/raw_geo/std": 0.0484820477105618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 997.6875, "completions/mean_terminated_length": 964.2000732421875, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.876375275055011, "frac_reward_zero_std": 0.0, "grad_norm": 3.2285785753555705, "kl": 0.01922607421875, "learning_rate": 1.4138656432829742e-07, "loss": -0.086, "num_tokens": 192006889.0, "reward": 0.0, "reward_std": 0.9186073541641235, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3644923097201783, "rewards/wordcountpos_reward/raw_geo/std": 0.09931392711965359, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362766, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 1100.5625, "completions/mean_terminated_length": 1100.5625, "completions/min_length": 759.0, "completions/min_terminated_length": 759.0, "epoch": 0.8765753150630126, "frac_reward_zero_std": 0.0, "grad_norm": 2.759913143947702, "kl": 0.0124359130859375, "learning_rate": 1.412550316788324e-07, "loss": 0.0172, "num_tokens": 192051242.0, "reward": 0.0, "reward_std": 0.6878140568733215, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10654879986957613, "rewards/wordcountpos_reward/raw_geo/std": 0.10352174391702665, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 1059.125, "completions/mean_terminated_length": 1059.125, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.8767753550710142, "frac_reward_zero_std": 0.0, "grad_norm": 3.1501041562523464, "kl": 0.012420654296875, "learning_rate": 1.4112369833527007e-07, "loss": 0.0065, "num_tokens": 192080628.0, "reward": 0.0, "reward_std": 0.24408167600631714, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02179489208892455, "rewards/wordcountpos_reward/raw_geo/std": 0.06807171156411353, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03442651863295481, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 965.4375, "completions/mean_terminated_length": 965.4375, "completions/min_length": 573.0, "completions/min_terminated_length": 573.0, "epoch": 0.8769753950790158, "frac_reward_zero_std": 0.0, "grad_norm": 2.697139369372952, "kl": 0.01812744140625, "learning_rate": 1.4099256436164898e-07, "loss": -0.0389, "num_tokens": 192108555.0, "reward": 0.0, "reward_std": 0.8471590280532837, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0006313482423415812, "rewards/wordcountpos_reward/raw_geo/std": 0.11964207711239291, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1263.0625, "completions/mean_terminated_length": 1247.2667236328125, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.8771754350870175, "frac_reward_zero_std": 0.0, "grad_norm": 3.1394842719909066, "kl": 0.014739990234375, "learning_rate": 1.4086162982191084e-07, "loss": -0.0066, "num_tokens": 192157124.0, "reward": -5.960464477539063e-08, "reward_std": 0.830713152885437, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.058940695217115625, "rewards/wordcountpos_reward/raw_geo/std": 0.11669396347398682, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242308, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1043.875, "completions/mean_terminated_length": 1043.875, "completions/min_length": 762.0, "completions/min_terminated_length": 762.0, "epoch": 0.877375475095019, "frac_reward_zero_std": 0.0, "grad_norm": 2.958140259143717, "kl": 0.01275634765625, "learning_rate": 1.4073089477989982e-07, "loss": 0.0132, "num_tokens": 192194346.0, "reward": 7.450580596923828e-09, "reward_std": 1.0295838117599487, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0513069254258069, "rewards/wordcountpos_reward/raw_geo/std": 0.08924779616427539, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10576003586036262, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1203.875, "completions/mean_terminated_length": 1105.166748046875, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.8775755151030206, "frac_reward_zero_std": 0.0, "grad_norm": 3.326283335848928, "kl": 0.0181732177734375, "learning_rate": 1.40600359299363e-07, "loss": 0.0132, "num_tokens": 192241440.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5622525215148926, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.31001554068493586, "rewards/wordcountpos_reward/raw_geo/std": 0.37101340180794784, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1147.625, "completions/mean_terminated_length": 1030.166748046875, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.8777755551110222, "frac_reward_zero_std": 0.0, "grad_norm": 3.4950320679168123, "kl": 0.0220947265625, "learning_rate": 1.4047002344395005e-07, "loss": 0.0432, "num_tokens": 192289970.0, "reward": 5.21540641784668e-08, "reward_std": 1.0253639221191406, "rewards/wordcountpos_reward/mean": 5.21540641784668e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05888235126661496, "rewards/wordcountpos_reward/raw_geo/std": 0.12554861853314708, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1205.625, "completions/mean_terminated_length": 1186.0001220703125, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.8779755951190238, "frac_reward_zero_std": 0.0, "grad_norm": 3.381509119932007, "kl": 0.0170135498046875, "learning_rate": 1.403398872772132e-07, "loss": 0.0018, "num_tokens": 192336876.0, "reward": 0.0, "reward_std": 0.8960031867027283, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09979794509105701, "rewards/wordcountpos_reward/raw_geo/std": 0.049262244707280044, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194862, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1166.0, "completions/max_terminated_length": 1166.0, "completions/mean_length": 967.875, "completions/mean_terminated_length": 967.875, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.8781756351270255, "frac_reward_zero_std": 0.0, "grad_norm": 3.0392843675796497, "kl": 0.01708984375, "learning_rate": 1.402099508626076e-07, "loss": -0.0066, "num_tokens": 192374802.0, "reward": -2.9802322387695312e-08, "reward_std": 0.88458251953125, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06767104593310361, "rewards/wordcountpos_reward/raw_geo/std": 0.11631750704315455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1321.4375, "completions/mean_terminated_length": 1142.875, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.878375675135027, "frac_reward_zero_std": 0.0, "grad_norm": 3.072775088161374, "kl": 0.026458740234375, "learning_rate": 1.4008021426349083e-07, "loss": -0.0255, "num_tokens": 192431593.0, "reward": 0.0, "reward_std": 0.9251573085784912, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08385772127496663, "rewards/wordcountpos_reward/raw_geo/std": 0.22320582044262954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1229.6875, "completions/mean_terminated_length": 1229.6875, "completions/min_length": 1003.0, "completions/min_terminated_length": 1003.0, "epoch": 0.8785757151430286, "frac_reward_zero_std": 0.0, "grad_norm": 2.8128632184022626, "kl": 0.0164947509765625, "learning_rate": 1.399506775431229e-07, "loss": -0.0024, "num_tokens": 192477124.0, "reward": 7.450580596923828e-09, "reward_std": 1.0504132509231567, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.07131097195576795, "rewards/wordcountpos_reward/raw_geo/std": 0.14359339015727549, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.7791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 1185.75, "completions/mean_terminated_length": 1042.9091796875, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.8787757551510302, "frac_reward_zero_std": 0.0, "grad_norm": 3.219428039083163, "kl": 0.016815185546875, "learning_rate": 1.3982134076466662e-07, "loss": -0.005, "num_tokens": 192522696.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9467059373855591, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05059818633155676, "rewards/wordcountpos_reward/raw_geo/std": 0.07239452967295994, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09067647005823629, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1441.0, "completions/mean_terminated_length": 1342.666748046875, "completions/min_length": 1232.0, "completions/min_terminated_length": 1232.0, "epoch": 0.8789757951590318, "frac_reward_zero_std": 0.0, "grad_norm": 2.6088326867093516, "kl": 0.014190673828125, "learning_rate": 1.396922039911872e-07, "loss": -0.0077, "num_tokens": 192569192.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9401513934135437, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0822643738860213, "rewards/wordcountpos_reward/raw_geo/std": 0.13599368521633384, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373403, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 963.375, "completions/mean_terminated_length": 963.375, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.8791758351670335, "frac_reward_zero_std": 0.0, "grad_norm": 3.5870533131772993, "kl": 0.020843505859375, "learning_rate": 1.395632672856523e-07, "loss": -0.0184, "num_tokens": 192609222.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9910258054733276, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08372680294056661, "rewards/wordcountpos_reward/raw_geo/std": 0.07264104792248825, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1061.625, "completions/mean_terminated_length": 1032.4000244140625, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.879375875175035, "frac_reward_zero_std": 0.0, "grad_norm": 3.445572565047351, "kl": 0.0176239013671875, "learning_rate": 1.3943453071093187e-07, "loss": 0.0259, "num_tokens": 192638632.0, "reward": 1.30385160446167e-08, "reward_std": 1.0348440408706665, "rewards/wordcountpos_reward/mean": 1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04877002648384185, "rewards/wordcountpos_reward/raw_geo/std": 0.0790339269951756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1168.6875, "completions/mean_terminated_length": 1018.0909423828125, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.8795759151830366, "frac_reward_zero_std": 0.0, "grad_norm": 3.1296110131976915, "kl": 0.0159759521484375, "learning_rate": 1.393059943297988e-07, "loss": -0.0299, "num_tokens": 192683691.0, "reward": 0.0, "reward_std": 1.0557456016540527, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02717535539593742, "rewards/wordcountpos_reward/raw_geo/std": 0.05602995868746391, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 842.75, "completions/mean_terminated_length": 842.75, "completions/min_length": 545.0, "completions/min_terminated_length": 545.0, "epoch": 0.8797759551910382, "frac_reward_zero_std": 0.0, "grad_norm": 3.7715718272248124, "kl": 0.0161285400390625, "learning_rate": 1.3917765820492766e-07, "loss": 0.0115, "num_tokens": 192722815.0, "reward": 0.0, "reward_std": 0.7589221000671387, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.35653820830101124, "rewards/wordcountpos_reward/raw_geo/std": 0.24960935016912203, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1343.625, "completions/mean_terminated_length": 1249.800048828125, "completions/min_length": 1102.0, "completions/min_terminated_length": 1102.0, "epoch": 0.8799759951990398, "frac_reward_zero_std": 0.0, "grad_norm": 2.8145288320652746, "kl": 0.01470947265625, "learning_rate": 1.3904952239889606e-07, "loss": -0.0372, "num_tokens": 192770729.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0502079725265503, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03672643951983323, "rewards/wordcountpos_reward/raw_geo/std": 0.05107175232059033, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1104.0, "completions/mean_terminated_length": 1104.0, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8801760352070415, "frac_reward_zero_std": 0.0, "grad_norm": 3.257893558223202, "kl": 0.01715087890625, "learning_rate": 1.3892158697418325e-07, "loss": -0.0098, "num_tokens": 192807521.0, "reward": 1.1175870895385742e-08, "reward_std": 0.9929890632629395, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06951966182408573, "rewards/wordcountpos_reward/raw_geo/std": 0.0792573821433912, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1390.0, "completions/max_terminated_length": 1390.0, "completions/mean_length": 1146.25, "completions/mean_terminated_length": 1146.25, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.880376075215043, "frac_reward_zero_std": 0.0, "grad_norm": 3.4674308289412816, "kl": 0.018798828125, "learning_rate": 1.387938519931714e-07, "loss": -0.0058, "num_tokens": 192846485.0, "reward": -5.960464477539063e-08, "reward_std": 0.8862185478210449, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16702595794106181, "rewards/wordcountpos_reward/raw_geo/std": 0.22114943863761471, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1289.0, "completions/mean_length": 1083.125, "completions/mean_terminated_length": 1055.3333740234375, "completions/min_length": 730.0, "completions/min_terminated_length": 730.0, "epoch": 0.8805761152230446, "frac_reward_zero_std": 0.0, "grad_norm": 3.2994835487468013, "kl": 0.0180511474609375, "learning_rate": 1.3866631751814452e-07, "loss": -0.0282, "num_tokens": 192895135.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0642927885055542, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10797320727162286, "rewards/wordcountpos_reward/raw_geo/std": 0.35508785786327296, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1140.3125, "completions/mean_terminated_length": 1140.3125, "completions/min_length": 846.0, "completions/min_terminated_length": 846.0, "epoch": 0.8807761552310462, "frac_reward_zero_std": 0.0, "grad_norm": 2.7764076587453186, "kl": 0.0185394287109375, "learning_rate": 1.385389836112891e-07, "loss": 0.0258, "num_tokens": 192930884.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5041552782058716, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.033654080561720784, "rewards/wordcountpos_reward/raw_geo/std": 0.059790775298433685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 1181.8125, "completions/mean_terminated_length": 1181.8125, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.8809761952390478, "frac_reward_zero_std": 0.0, "grad_norm": 3.0187862931646756, "kl": 0.0167236328125, "learning_rate": 1.384118503346937e-07, "loss": -0.0116, "num_tokens": 192975209.0, "reward": 0.0, "reward_std": 0.5364234447479248, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07103726423419769, "rewards/wordcountpos_reward/raw_geo/std": 0.1640838389964608, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1150.0625, "completions/mean_terminated_length": 1150.0625, "completions/min_length": 982.0, "completions/min_terminated_length": 982.0, "epoch": 0.8811762352470494, "frac_reward_zero_std": 0.0, "grad_norm": 1.9231650933799451, "kl": 0.0079193115234375, "learning_rate": 1.3828491775034897e-07, "loss": -0.0177, "num_tokens": 193008002.0, "reward": -7.450580596923828e-09, "reward_std": 1.0516908168792725, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.034153114505134446, "rewards/wordcountpos_reward/raw_geo/std": 0.07500724100145259, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1104.1875, "completions/mean_terminated_length": 1077.800048828125, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.881376275255051, "frac_reward_zero_std": 0.0, "grad_norm": 3.4482584334420405, "kl": 0.016876220703125, "learning_rate": 1.381581859201479e-07, "loss": -0.0037, "num_tokens": 193047637.0, "reward": -4.470348358154297e-08, "reward_std": 1.065464973449707, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00847180943174565, "rewards/wordcountpos_reward/raw_geo/std": 0.18606700263785347, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 1021.9375, "completions/mean_terminated_length": 1021.9375, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.8815763152630526, "frac_reward_zero_std": 0.0, "grad_norm": 2.7738956748117536, "kl": 0.01194000244140625, "learning_rate": 1.3803165490588553e-07, "loss": 0.0154, "num_tokens": 193094676.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8831729888916016, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0034139852295493114, "rewards/wordcountpos_reward/raw_geo/std": 0.06441598320080169, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 964.125, "completions/mean_terminated_length": 964.125, "completions/min_length": 620.0, "completions/min_terminated_length": 620.0, "epoch": 0.8817763552710542, "frac_reward_zero_std": 0.0, "grad_norm": 4.000628070981809, "kl": 0.0177764892578125, "learning_rate": 1.3790532476925902e-07, "loss": -0.0375, "num_tokens": 193124734.0, "reward": -1.4901161193847656e-08, "reward_std": 1.001609444618225, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14252184808822985, "rewards/wordcountpos_reward/raw_geo/std": 0.16263570322895668, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1234.5625, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.8819763952790558, "frac_reward_zero_std": 0.0, "grad_norm": 3.2185796694421893, "kl": 0.01873779296875, "learning_rate": 1.3777919557186736e-07, "loss": -0.0121, "num_tokens": 193169879.0, "reward": 1.7695128917694092e-08, "reward_std": 0.8789088129997253, "rewards/wordcountpos_reward/mean": 1.7695128917694092e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.013510433330344938, "rewards/wordcountpos_reward/raw_geo/std": 0.12142203182751281, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 874.3125, "completions/mean_terminated_length": 874.3125, "completions/min_length": 486.0, "completions/min_terminated_length": 486.0, "epoch": 0.8821764352870574, "frac_reward_zero_std": 0.0, "grad_norm": 3.887938782350559, "kl": 0.017578125, "learning_rate": 1.3765326737521178e-07, "loss": -0.0275, "num_tokens": 193206468.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9418660402297974, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07028585617209365, "rewards/wordcountpos_reward/raw_geo/std": 0.07727096266120699, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1037.0, "completions/max_terminated_length": 1037.0, "completions/mean_length": 905.9375, "completions/mean_terminated_length": 905.9375, "completions/min_length": 704.0, "completions/min_terminated_length": 704.0, "epoch": 0.882376475295059, "frac_reward_zero_std": 0.0, "grad_norm": 3.084179740281046, "kl": 0.01629638671875, "learning_rate": 1.3752754024069546e-07, "loss": -0.0264, "num_tokens": 193238003.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0435287952423096, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10881013452709418, "rewards/wordcountpos_reward/raw_geo/std": 0.06229496851082211, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078612, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1080.875, "completions/mean_terminated_length": 1080.875, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.8825765153030606, "frac_reward_zero_std": 0.0, "grad_norm": 3.2121229798304363, "kl": 0.0158233642578125, "learning_rate": 1.3740201422962362e-07, "loss": -0.0655, "num_tokens": 193281769.0, "reward": -7.450580596923828e-09, "reward_std": 1.0427420139312744, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.2023437697023969, "rewards/wordcountpos_reward/raw_geo/std": 0.12016241584967799, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1136.0, "completions/max_terminated_length": 1136.0, "completions/mean_length": 981.0, "completions/mean_terminated_length": 981.0, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.8827765553110622, "frac_reward_zero_std": 0.0, "grad_norm": 3.0340812168356126, "kl": 0.0159149169921875, "learning_rate": 1.372766894032031e-07, "loss": -0.0242, "num_tokens": 193319041.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0675495862960815, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13681092103814596, "rewards/wordcountpos_reward/raw_geo/std": 0.1164081400875781, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1137.9375, "completions/mean_terminated_length": 1137.9375, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.8829765953190638, "frac_reward_zero_std": 0.0, "grad_norm": 2.5951125661465073, "kl": 0.0121917724609375, "learning_rate": 1.3715156582254294e-07, "loss": -0.0252, "num_tokens": 193352896.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8739527463912964, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07436686402229102, "rewards/wordcountpos_reward/raw_geo/std": 0.10519503221396533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258096, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1100.9375, "completions/mean_terminated_length": 1100.9375, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.8831766353270654, "frac_reward_zero_std": 0.0, "grad_norm": 3.4609701613888446, "kl": 0.0148773193359375, "learning_rate": 1.3702664354865403e-07, "loss": -0.0328, "num_tokens": 193395767.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8285161256790161, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027444587261148283, "rewards/wordcountpos_reward/raw_geo/std": 0.11533122064652455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 846.625, "completions/mean_terminated_length": 846.625, "completions/min_length": 397.0, "completions/min_terminated_length": 397.0, "epoch": 0.883376675335067, "frac_reward_zero_std": 0.0, "grad_norm": 3.487987136112229, "kl": 0.0151214599609375, "learning_rate": 1.369019226424491e-07, "loss": -0.057, "num_tokens": 193434769.0, "reward": 0.0, "reward_std": 0.6670342683792114, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.17290893682902506, "rewards/wordcountpos_reward/raw_geo/std": 0.22829617738572172, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1266.875, "completions/mean_terminated_length": 1033.75, "completions/min_length": 764.0, "completions/min_terminated_length": 764.0, "epoch": 0.8835767153430686, "frac_reward_zero_std": 0.0, "grad_norm": 2.5981313510946196, "kl": 0.0135040283203125, "learning_rate": 1.3677740316474236e-07, "loss": -0.0549, "num_tokens": 193474183.0, "reward": -2.9802322387695312e-08, "reward_std": 0.28691038489341736, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1601429192962943, "rewards/wordcountpos_reward/raw_geo/std": 0.2018183950765476, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1352.5, "completions/mean_terminated_length": 1303.3333740234375, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.8837767553510703, "frac_reward_zero_std": 0.0, "grad_norm": 2.859109512304825, "kl": 0.0146636962890625, "learning_rate": 1.3665308517625031e-07, "loss": 0.0018, "num_tokens": 193529567.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8234812021255493, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15054300306537655, "rewards/wordcountpos_reward/raw_geo/std": 0.10249172383375554, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1165.25, "completions/mean_terminated_length": 1165.25, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.8839767953590718, "frac_reward_zero_std": 0.0, "grad_norm": 3.342974620395988, "kl": 0.017547607421875, "learning_rate": 1.365289687375909e-07, "loss": -0.0141, "num_tokens": 193579187.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6375408172607422, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.004362673880252422, "rewards/wordcountpos_reward/raw_geo/std": 0.1395031222741599, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11287488977066928, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1146.625, "completions/mean_terminated_length": 1123.0667724609375, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.8841768353670734, "frac_reward_zero_std": 0.0, "grad_norm": 2.6363535719566142, "kl": 0.01165771484375, "learning_rate": 1.3640505390928393e-07, "loss": -0.0588, "num_tokens": 193620429.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9969998598098755, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05741295438180166, "rewards/wordcountpos_reward/raw_geo/std": 0.07018940638633629, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1131.375, "completions/mean_terminated_length": 1131.375, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.884376875375075, "frac_reward_zero_std": 0.0, "grad_norm": 3.3804267271551742, "kl": 0.01904296875, "learning_rate": 1.362813407517506e-07, "loss": -0.0283, "num_tokens": 193661883.0, "reward": -1.1175870895385742e-08, "reward_std": 0.9577149152755737, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036556661666297556, "rewards/wordcountpos_reward/raw_geo/std": 0.07739815349807484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1118.5, "completions/mean_terminated_length": 1118.5, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.8845769153830766, "frac_reward_zero_std": 0.0, "grad_norm": 3.442773030616646, "kl": 0.0181884765625, "learning_rate": 1.3615782932531434e-07, "loss": 0.0176, "num_tokens": 193699787.0, "reward": 9.313225746154785e-09, "reward_std": 1.0403406620025635, "rewards/wordcountpos_reward/mean": 9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.034191170278412834, "rewards/wordcountpos_reward/raw_geo/std": 0.10893018774436723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1067.1875, "completions/mean_terminated_length": 1067.1875, "completions/min_length": 787.0, "completions/min_terminated_length": 787.0, "epoch": 0.8847769553910783, "frac_reward_zero_std": 0.0, "grad_norm": 3.010244138528641, "kl": 0.017333984375, "learning_rate": 1.3603451969019962e-07, "loss": -0.0351, "num_tokens": 193750710.0, "reward": 0.0, "reward_std": 0.8894560933113098, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05931632557517283, "rewards/wordcountpos_reward/raw_geo/std": 0.20169088321616924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1001.25, "completions/mean_terminated_length": 1001.25, "completions/min_length": 516.0, "completions/min_terminated_length": 516.0, "epoch": 0.8849769953990798, "frac_reward_zero_std": 0.0, "grad_norm": 3.469959075976723, "kl": 0.0139617919921875, "learning_rate": 1.35911411906533e-07, "loss": -0.0353, "num_tokens": 193785658.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9785954356193542, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05711135918746618, "rewards/wordcountpos_reward/raw_geo/std": 0.15016033333087614, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.775, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 929.0, "completions/mean_terminated_length": 929.0, "completions/min_length": 754.0, "completions/min_terminated_length": 754.0, "epoch": 0.8851770354070814, "frac_reward_zero_std": 0.0, "grad_norm": 2.705624500628476, "kl": 0.0140228271484375, "learning_rate": 1.357885060343421e-07, "loss": -0.0404, "num_tokens": 193821154.0, "reward": 0.0, "reward_std": 0.8306316137313843, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0633758901406006, "rewards/wordcountpos_reward/raw_geo/std": 0.07532773245307908, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1234.125, "completions/mean_terminated_length": 1216.4000244140625, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.885377075415083, "frac_reward_zero_std": 0.0, "grad_norm": 2.9677459186951975, "kl": 0.0131072998046875, "learning_rate": 1.3566580213355673e-07, "loss": 0.0178, "num_tokens": 193874628.0, "reward": 0.0, "reward_std": 0.4981011748313904, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07068749817707715, "rewards/wordcountpos_reward/raw_geo/std": 0.23672255269090875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1102.0, "completions/mean_terminated_length": 1010.1538696289062, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.8855771154230846, "frac_reward_zero_std": 0.0, "grad_norm": 3.6281947044665386, "kl": 0.022735595703125, "learning_rate": 1.3554330026400753e-07, "loss": -0.0009, "num_tokens": 193922716.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0278894901275635, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3587083815362155, "rewards/wordcountpos_reward/raw_geo/std": 0.11692284573984356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16233253479155635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 1147.5625, "completions/mean_terminated_length": 1147.5625, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.8857771554310863, "frac_reward_zero_std": 0.0, "grad_norm": 3.3309518789716543, "kl": 0.020263671875, "learning_rate": 1.3542100048542723e-07, "loss": -0.0256, "num_tokens": 193960133.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0333671569824219, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024630526453481565, "rewards/wordcountpos_reward/raw_geo/std": 0.06081504438409499, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1198.875, "completions/mean_terminated_length": 1129.3846435546875, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.8859771954390878, "frac_reward_zero_std": 0.0, "grad_norm": 3.4671460367726645, "kl": 0.016845703125, "learning_rate": 1.352989028574496e-07, "loss": 0.0151, "num_tokens": 194007371.0, "reward": -5.960464477539063e-08, "reward_std": 0.8523173332214355, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.22305261173546714, "rewards/wordcountpos_reward/raw_geo/std": 0.28948987087397876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115676, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 1038.0625, "completions/mean_terminated_length": 1038.0625, "completions/min_length": 760.0, "completions/min_terminated_length": 760.0, "epoch": 0.8861772354470894, "frac_reward_zero_std": 0.0, "grad_norm": 3.1667490302730044, "kl": 0.017486572265625, "learning_rate": 1.3517700743961017e-07, "loss": 0.0156, "num_tokens": 194049740.0, "reward": 2.2351741790771484e-08, "reward_std": 1.0589730739593506, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01765106678825723, "rewards/wordcountpos_reward/raw_geo/std": 0.03410941818139318, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 1156.8125, "completions/mean_terminated_length": 1133.933349609375, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.886377275455091, "frac_reward_zero_std": 0.0, "grad_norm": 3.481604782331785, "kl": 0.01806640625, "learning_rate": 1.3505531429134541e-07, "loss": -0.0226, "num_tokens": 194093417.0, "reward": -5.960464477539063e-08, "reward_std": 0.4364674687385559, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09365665253527823, "rewards/wordcountpos_reward/raw_geo/std": 0.10404204156138194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1065.8125, "completions/mean_terminated_length": 1065.8125, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.8865773154630926, "frac_reward_zero_std": 0.0, "grad_norm": 3.4934731209723098, "kl": 0.0201416015625, "learning_rate": 1.3493382347199373e-07, "loss": -0.017, "num_tokens": 194129334.0, "reward": 7.450580596923828e-09, "reward_std": 0.9868636131286621, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06859519601260883, "rewards/wordcountpos_reward/raw_geo/std": 0.19741059205527212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1197.0, "completions/mean_terminated_length": 1197.0, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.8867773554710943, "frac_reward_zero_std": 0.0, "grad_norm": 3.1217899004426117, "kl": 0.0166778564453125, "learning_rate": 1.3481253504079447e-07, "loss": 0.0031, "num_tokens": 194167694.0, "reward": 0.0, "reward_std": 1.0004427433013916, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011121764951765648, "rewards/wordcountpos_reward/raw_geo/std": 0.19568892054840092, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1073.75, "completions/mean_terminated_length": 1045.3333740234375, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.8869773954790958, "frac_reward_zero_std": 0.0, "grad_norm": 3.5895434093643046, "kl": 0.017578125, "learning_rate": 1.3469144905688854e-07, "loss": 0.0139, "num_tokens": 194217842.0, "reward": 0.0, "reward_std": 0.9575405120849609, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13027743175121614, "rewards/wordcountpos_reward/raw_geo/std": 0.13911918512255136, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1274.75, "completions/mean_terminated_length": 1259.7333984375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.8871774354870974, "frac_reward_zero_std": 0.0, "grad_norm": 2.793501697575995, "kl": 0.0121307373046875, "learning_rate": 1.3457056557931784e-07, "loss": 0.0033, "num_tokens": 194253758.0, "reward": 1.4901161193847656e-08, "reward_std": 1.011135458946228, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058604462439358365, "rewards/wordcountpos_reward/raw_geo/std": 0.07423518273386664, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1099.8125, "completions/mean_terminated_length": 1099.8125, "completions/min_length": 872.0, "completions/min_terminated_length": 872.0, "epoch": 0.887377475495099, "frac_reward_zero_std": 0.0, "grad_norm": 3.128653187462499, "kl": 0.011993408203125, "learning_rate": 1.344498846670258e-07, "loss": -0.0065, "num_tokens": 194295875.0, "reward": 0.0, "reward_std": 0.433698832988739, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0031882669823845664, "rewards/wordcountpos_reward/raw_geo/std": 0.12995136329542176, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1128748897706693, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1078.5625, "completions/mean_terminated_length": 1078.5625, "completions/min_length": 623.0, "completions/min_terminated_length": 623.0, "epoch": 0.8875775155031006, "frac_reward_zero_std": 0.0, "grad_norm": 3.1256132611036596, "kl": 0.01824951171875, "learning_rate": 1.3432940637885695e-07, "loss": -0.046, "num_tokens": 194340404.0, "reward": 0.0, "reward_std": 0.36555489897727966, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03170227316833057, "rewards/wordcountpos_reward/raw_geo/std": 0.10100180323747975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1389.0, "completions/max_terminated_length": 1389.0, "completions/mean_length": 997.3125, "completions/mean_terminated_length": 997.3125, "completions/min_length": 652.0, "completions/min_terminated_length": 652.0, "epoch": 0.8877775555111023, "frac_reward_zero_std": 0.0, "grad_norm": 3.689082572990748, "kl": 0.022613525390625, "learning_rate": 1.342091307735571e-07, "loss": -0.0215, "num_tokens": 194381321.0, "reward": 0.0, "reward_std": 0.6722843647003174, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011088418459157121, "rewards/wordcountpos_reward/raw_geo/std": 0.09047295119050769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16233253479155635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1156.25, "completions/mean_terminated_length": 1156.25, "completions/min_length": 913.0, "completions/min_terminated_length": 913.0, "epoch": 0.8879775955191038, "frac_reward_zero_std": 0.0, "grad_norm": 3.23746148358338, "kl": 0.0184783935546875, "learning_rate": 1.3408905790977318e-07, "loss": 0.0056, "num_tokens": 194424029.0, "reward": -2.9802322387695312e-08, "reward_std": 0.701089084148407, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13850968900132032, "rewards/wordcountpos_reward/raw_geo/std": 0.14752808355227648, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1155.375, "completions/mean_terminated_length": 1155.375, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.8881776355271054, "frac_reward_zero_std": 0.0, "grad_norm": 2.7403307659190497, "kl": 0.0143585205078125, "learning_rate": 1.3396918784605319e-07, "loss": -0.0105, "num_tokens": 194462307.0, "reward": 0.0, "reward_std": 1.0208618640899658, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06349758857184529, "rewards/wordcountpos_reward/raw_geo/std": 0.2086734751206101, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1407.8125, "completions/mean_terminated_length": 1377.0833740234375, "completions/min_length": 1111.0, "completions/min_terminated_length": 1111.0, "epoch": 0.888377675535107, "frac_reward_zero_std": 0.0, "grad_norm": 2.644719823679903, "kl": 0.0160675048828125, "learning_rate": 1.338495206408463e-07, "loss": -0.0105, "num_tokens": 194514976.0, "reward": 7.450580596923828e-09, "reward_std": 1.0442841053009033, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.03333336967910696, "rewards/wordcountpos_reward/raw_geo/std": 0.0636844527313895, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 1105.0, "completions/mean_terminated_length": 1105.0, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.8885777155431086, "frac_reward_zero_std": 0.0, "grad_norm": 3.2091643291954175, "kl": 0.019073486328125, "learning_rate": 1.3373005635250302e-07, "loss": -0.0087, "num_tokens": 194564792.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6925967931747437, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11020380376927236, "rewards/wordcountpos_reward/raw_geo/std": 0.13718362943746398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1062.5, "completions/mean_terminated_length": 1062.5, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.8887777555511103, "frac_reward_zero_std": 0.0, "grad_norm": 3.158481707535591, "kl": 0.0148773193359375, "learning_rate": 1.3361079503927429e-07, "loss": -0.0186, "num_tokens": 194599472.0, "reward": 3.725290298461914e-08, "reward_std": 1.0499560832977295, "rewards/wordcountpos_reward/mean": 3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08703712299298141, "rewards/wordcountpos_reward/raw_geo/std": 0.05817840691236685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1219.3125, "completions/mean_terminated_length": 1219.3125, "completions/min_length": 1009.0, "completions/min_terminated_length": 1009.0, "epoch": 0.8889777955591118, "frac_reward_zero_std": 0.0, "grad_norm": 2.8020231267166826, "kl": 0.0162811279296875, "learning_rate": 1.3349173675931285e-07, "loss": -0.0097, "num_tokens": 194643077.0, "reward": 0.0, "reward_std": 0.8324133157730103, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.17514510799681948, "rewards/wordcountpos_reward/raw_geo/std": 0.1789341759596741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1128.5, "completions/mean_terminated_length": 1075.4285888671875, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.8891778355671134, "frac_reward_zero_std": 0.0, "grad_norm": 2.8049866350554584, "kl": 0.012908935546875, "learning_rate": 1.333728815706718e-07, "loss": -0.0038, "num_tokens": 194683941.0, "reward": -1.4901161193847656e-08, "reward_std": 0.930994987487793, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18399481832504247, "rewards/wordcountpos_reward/raw_geo/std": 0.04962787545164397, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408155, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1147.6875, "completions/mean_terminated_length": 1147.6875, "completions/min_length": 963.0, "completions/min_terminated_length": 963.0, "epoch": 0.889377875575115, "frac_reward_zero_std": 0.0, "grad_norm": 3.4128989362872115, "kl": 0.01837158203125, "learning_rate": 1.3325422953130564e-07, "loss": 0.0102, "num_tokens": 194741248.0, "reward": 0.0, "reward_std": 0.940065860748291, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08871471885497617, "rewards/wordcountpos_reward/raw_geo/std": 0.1381995575402556, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1175.9375, "completions/mean_terminated_length": 1129.6429443359375, "completions/min_length": 745.0, "completions/min_terminated_length": 745.0, "epoch": 0.8895779155831166, "frac_reward_zero_std": 0.0, "grad_norm": 3.272631260979258, "kl": 0.019500732421875, "learning_rate": 1.3313578069906945e-07, "loss": -0.0158, "num_tokens": 194777575.0, "reward": 0.0, "reward_std": 1.0331639051437378, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10867286535397341, "rewards/wordcountpos_reward/raw_geo/std": 0.12737364180669364, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 1060.0625, "completions/mean_terminated_length": 1060.0625, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.8897779555911183, "frac_reward_zero_std": 0.0, "grad_norm": 3.0269983201363546, "kl": 0.0178070068359375, "learning_rate": 1.3301753513171965e-07, "loss": -0.0411, "num_tokens": 194830024.0, "reward": 0.0, "reward_std": 0.9856574535369873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1468966430073937, "rewards/wordcountpos_reward/raw_geo/std": 0.18340486807648199, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1344.875, "completions/mean_terminated_length": 1274.3636474609375, "completions/min_length": 970.0, "completions/min_terminated_length": 970.0, "epoch": 0.8899779955991198, "frac_reward_zero_std": 0.0, "grad_norm": 2.9192949870329317, "kl": 0.0158843994140625, "learning_rate": 1.3289949288691313e-07, "loss": -0.0407, "num_tokens": 194882942.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9126633405685425, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2260199541709977, "rewards/wordcountpos_reward/raw_geo/std": 0.25707155873129295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804347, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1350.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1022.5625, "completions/mean_terminated_length": 1022.5625, "completions/min_length": 720.0, "completions/min_terminated_length": 720.0, "epoch": 0.8901780356071214, "frac_reward_zero_std": 0.0, "grad_norm": 3.5276932159034815, "kl": 0.017181396484375, "learning_rate": 1.3278165402220787e-07, "loss": -0.0286, "num_tokens": 194920295.0, "reward": -2.9802322387695312e-08, "reward_std": 0.48202264308929443, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13220819406391135, "rewards/wordcountpos_reward/raw_geo/std": 0.27063473733311477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 1053.75, "completions/mean_terminated_length": 1053.75, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.890378075615123, "frac_reward_zero_std": 0.0, "grad_norm": 3.5214432281463344, "kl": 0.0164337158203125, "learning_rate": 1.326640185950627e-07, "loss": 0.0062, "num_tokens": 194960003.0, "reward": 2.9802322387695312e-08, "reward_std": 0.4129185080528259, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07329363696983401, "rewards/wordcountpos_reward/raw_geo/std": 0.12511159898746732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952264, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1231.1875, "completions/mean_terminated_length": 1169.1539306640625, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.8905781156231246, "frac_reward_zero_std": 0.0, "grad_norm": 2.485586979609979, "kl": 0.0151824951171875, "learning_rate": 1.325465866628372e-07, "loss": -0.0978, "num_tokens": 195013950.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0319173336029053, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.029039189691761568, "rewards/wordcountpos_reward/raw_geo/std": 0.0706204821160714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1323.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 1010.6875, "completions/mean_terminated_length": 1010.6875, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.8907781556311263, "frac_reward_zero_std": 0.0, "grad_norm": 3.3216961934482594, "kl": 0.0206298828125, "learning_rate": 1.3242935828279161e-07, "loss": 0.0115, "num_tokens": 195063257.0, "reward": -7.450580596923828e-09, "reward_std": 1.062157154083252, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.06986513531366792, "rewards/wordcountpos_reward/raw_geo/std": 0.21372506515787965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1228.3125, "completions/mean_terminated_length": 1104.8182373046875, "completions/min_length": 812.0, "completions/min_terminated_length": 812.0, "epoch": 0.8909781956391278, "frac_reward_zero_std": 0.0, "grad_norm": 2.768803776283987, "kl": 0.0128936767578125, "learning_rate": 1.3231233351208702e-07, "loss": -0.1189, "num_tokens": 195113102.0, "reward": 2.9802322387695312e-08, "reward_std": 0.772198498249054, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03304311896465161, "rewards/wordcountpos_reward/raw_geo/std": 0.13502780529968883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 1059.125, "completions/mean_terminated_length": 1059.125, "completions/min_length": 880.0, "completions/min_terminated_length": 880.0, "epoch": 0.8911782356471294, "frac_reward_zero_std": 0.0, "grad_norm": 3.029150197287015, "kl": 0.0157012939453125, "learning_rate": 1.3219551240778528e-07, "loss": 0.0056, "num_tokens": 195152112.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8359369039535522, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.032206219864977514, "rewards/wordcountpos_reward/raw_geo/std": 0.05868188756571957, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 1.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1265.0, "completions/mean_length": 1050.875, "completions/mean_terminated_length": 1020.9334106445312, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.8913782756551311, "frac_reward_zero_std": 0.0, "grad_norm": 3.523015495972121, "kl": 0.018890380859375, "learning_rate": 1.3207889502684906e-07, "loss": -0.0177, "num_tokens": 195202182.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0125062465667725, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01737032348032201, "rewards/wordcountpos_reward/raw_geo/std": 0.05061686990454266, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620104, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1184.75, "completions/mean_terminated_length": 1041.45458984375, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.8915783156631326, "frac_reward_zero_std": 0.0, "grad_norm": 3.17813256778795, "kl": 0.016876220703125, "learning_rate": 1.3196248142614122e-07, "loss": -0.0128, "num_tokens": 195247794.0, "reward": 0.0, "reward_std": 0.47954583168029785, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2079353815536203, "rewards/wordcountpos_reward/raw_geo/std": 0.1257982407830174, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1124.6875, "completions/mean_terminated_length": 1099.666748046875, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.8917783556711343, "frac_reward_zero_std": 0.0, "grad_norm": 3.0576174524387167, "kl": 0.0173797607421875, "learning_rate": 1.318462716624257e-07, "loss": -0.0481, "num_tokens": 195298173.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9552980065345764, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06368530665030982, "rewards/wordcountpos_reward/raw_geo/std": 0.0579295826226621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1041.4375, "completions/mean_terminated_length": 1010.86669921875, "completions/min_length": 622.0, "completions/min_terminated_length": 622.0, "epoch": 0.8919783956791358, "frac_reward_zero_std": 0.0, "grad_norm": 3.5645837438980945, "kl": 0.0147552490234375, "learning_rate": 1.3173026579236701e-07, "loss": -0.0037, "num_tokens": 195336676.0, "reward": 0.0, "reward_std": 0.9566646218299866, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3104289270433629, "rewards/wordcountpos_reward/raw_geo/std": 0.139045197077581, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1257.0, "completions/max_terminated_length": 1257.0, "completions/mean_length": 964.0625, "completions/mean_terminated_length": 964.0625, "completions/min_length": 734.0, "completions/min_terminated_length": 734.0, "epoch": 0.8921784356871374, "frac_reward_zero_std": 0.0, "grad_norm": 3.1629039894792625, "kl": 0.016998291015625, "learning_rate": 1.3161446387252998e-07, "loss": 0.0071, "num_tokens": 195376933.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0048192739486694, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.028578762914266004, "rewards/wordcountpos_reward/raw_geo/std": 0.06735676993110669, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1051.0, "completions/mean_terminated_length": 1021.0667114257812, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.8923784756951391, "frac_reward_zero_std": 0.0, "grad_norm": 3.2788853576792087, "kl": 0.014739990234375, "learning_rate": 1.3149886595938024e-07, "loss": 0.0335, "num_tokens": 195419141.0, "reward": 0.0, "reward_std": 1.068795919418335, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08141645785709913, "rewards/wordcountpos_reward/raw_geo/std": 0.06662123050569353, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12412657816683505, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 966.1875, "completions/mean_terminated_length": 966.1875, "completions/min_length": 586.0, "completions/min_terminated_length": 586.0, "epoch": 0.8925785157031406, "frac_reward_zero_std": 0.0, "grad_norm": 3.755394305578487, "kl": 0.029205322265625, "learning_rate": 1.3138347210928374e-07, "loss": 0.0003, "num_tokens": 195467208.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9904136657714844, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08057862786333338, "rewards/wordcountpos_reward/raw_geo/std": 0.09041745927177954, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1420.375, "completions/mean_terminated_length": 1384.181884765625, "completions/min_length": 1007.0, "completions/min_terminated_length": 1007.0, "epoch": 0.8927785557111422, "frac_reward_zero_std": 0.0, "grad_norm": 2.316769999549116, "kl": 0.0108795166015625, "learning_rate": 1.3126828237850729e-07, "loss": -0.0338, "num_tokens": 195523110.0, "reward": 0.0, "reward_std": 0.7927462458610535, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07284982395752528, "rewards/wordcountpos_reward/raw_geo/std": 0.2222374759956981, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1274.5625, "completions/mean_terminated_length": 1222.5384521484375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.8929785957191438, "frac_reward_zero_std": 0.0, "grad_norm": 2.604855496793416, "kl": 0.0138397216796875, "learning_rate": 1.3115329682321766e-07, "loss": -0.0123, "num_tokens": 195580239.0, "reward": 0.0, "reward_std": 0.6735268831253052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09770003323547469, "rewards/wordcountpos_reward/raw_geo/std": 0.1184470236769689, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1024.875, "completions/mean_terminated_length": 993.2000732421875, "completions/min_length": 542.0, "completions/min_terminated_length": 542.0, "epoch": 0.8931786357271454, "frac_reward_zero_std": 0.0, "grad_norm": 2.6260257918465695, "kl": 0.00916290283203125, "learning_rate": 1.3103851549948244e-07, "loss": -0.0456, "num_tokens": 195633245.0, "reward": 0.0, "reward_std": 0.8588553667068481, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12626570252370628, "rewards/wordcountpos_reward/raw_geo/std": 0.1429875858080879, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10886621079036349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 987.5625, "completions/mean_terminated_length": 987.5625, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.8933786757351471, "frac_reward_zero_std": 0.0, "grad_norm": 3.6389944635164375, "kl": 0.019317626953125, "learning_rate": 1.3092393846326956e-07, "loss": 0.0178, "num_tokens": 195663638.0, "reward": 0.0, "reward_std": 0.9725197553634644, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.022418699058344448, "rewards/wordcountpos_reward/raw_geo/std": 0.02636877545196447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1138550085106622, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 1093.1875, "completions/mean_terminated_length": 1093.1875, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.8935787157431486, "frac_reward_zero_std": 0.0, "grad_norm": 2.4589601297409938, "kl": 0.0095672607421875, "learning_rate": 1.3080956577044733e-07, "loss": 0.0004, "num_tokens": 195705313.0, "reward": 0.0, "reward_std": 1.0689334869384766, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15123839161375058, "rewards/wordcountpos_reward/raw_geo/std": 0.1304626009842332, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 1226.625, "completions/mean_terminated_length": 1226.625, "completions/min_length": 1142.0, "completions/min_terminated_length": 1142.0, "epoch": 0.8937787557511502, "frac_reward_zero_std": 0.0, "grad_norm": 2.881671002761142, "kl": 0.013824462890625, "learning_rate": 1.3069539747678424e-07, "loss": 0.0019, "num_tokens": 195753051.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7367368936538696, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04291003453025514, "rewards/wordcountpos_reward/raw_geo/std": 0.12945249157469021, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1139.75, "completions/mean_terminated_length": 1139.75, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.8939787957591518, "frac_reward_zero_std": 0.0, "grad_norm": 3.156669058422609, "kl": 0.016448974609375, "learning_rate": 1.305814336379494e-07, "loss": 0.0146, "num_tokens": 195802159.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5661492943763733, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06914551337547187, "rewards/wordcountpos_reward/raw_geo/std": 0.1409044565635708, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1219.75, "completions/mean_terminated_length": 1219.75, "completions/min_length": 868.0, "completions/min_terminated_length": 868.0, "epoch": 0.8941788357671534, "frac_reward_zero_std": 0.0, "grad_norm": 3.0857640554779255, "kl": 0.01666259765625, "learning_rate": 1.3046767430951202e-07, "loss": -0.003, "num_tokens": 195845811.0, "reward": 0.0, "reward_std": 1.0430270433425903, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05239828478818172, "rewards/wordcountpos_reward/raw_geo/std": 0.04626270670935488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1153.0, "completions/max_terminated_length": 1153.0, "completions/mean_length": 835.8125, "completions/mean_terminated_length": 835.8125, "completions/min_length": 639.0, "completions/min_terminated_length": 639.0, "epoch": 0.8943788757751551, "frac_reward_zero_std": 0.0, "grad_norm": 3.638195371175314, "kl": 0.018829345703125, "learning_rate": 1.303541195469418e-07, "loss": -0.0468, "num_tokens": 195871304.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8057836294174194, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08049843471101815, "rewards/wordcountpos_reward/raw_geo/std": 0.10645055470340706, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437972, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1117.0, "completions/max_terminated_length": 1117.0, "completions/mean_length": 947.5, "completions/mean_terminated_length": 947.5, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.8945789157831566, "frac_reward_zero_std": 0.0, "grad_norm": 4.1090583936851, "kl": 0.021636962890625, "learning_rate": 1.302407694056083e-07, "loss": -0.007, "num_tokens": 195911872.0, "reward": 0.0, "reward_std": 0.7176954746246338, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010312027673200089, "rewards/wordcountpos_reward/raw_geo/std": 0.111717753136589, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1184.25, "completions/mean_terminated_length": 1184.25, "completions/min_length": 890.0, "completions/min_terminated_length": 890.0, "epoch": 0.8947789557911582, "frac_reward_zero_std": 0.0, "grad_norm": 3.3308897178611523, "kl": 0.022216796875, "learning_rate": 1.3012762394078186e-07, "loss": 0.0016, "num_tokens": 195954596.0, "reward": 0.0, "reward_std": 0.9679096937179565, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.023067989655047288, "rewards/wordcountpos_reward/raw_geo/std": 0.06551663694241827, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 1161.0625, "completions/mean_terminated_length": 1161.0625, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.8949789957991598, "frac_reward_zero_std": 0.0, "grad_norm": 3.142288429752133, "kl": 0.0167388916015625, "learning_rate": 1.3001468320763256e-07, "loss": -0.0364, "num_tokens": 195989853.0, "reward": 2.9802322387695312e-08, "reward_std": 0.702738881111145, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04144699360224575, "rewards/wordcountpos_reward/raw_geo/std": 0.06339900853120126, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1140.5625, "completions/mean_terminated_length": 1140.5625, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.8951790358071614, "frac_reward_zero_std": 0.0, "grad_norm": 3.3300161916089066, "kl": 0.018524169921875, "learning_rate": 1.299019472612309e-07, "loss": -0.0594, "num_tokens": 196029366.0, "reward": 7.450580596923828e-09, "reward_std": 1.0635933876037598, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.13281672585382684, "rewards/wordcountpos_reward/raw_geo/std": 0.10196662006979489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593313, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1063.75, "completions/mean_terminated_length": 1063.75, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.8953790758151631, "frac_reward_zero_std": 0.0, "grad_norm": 3.2022938040383795, "kl": 0.017791748046875, "learning_rate": 1.2978941615654717e-07, "loss": 0.0203, "num_tokens": 196069970.0, "reward": 0.0, "reward_std": 0.42684534192085266, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0246253241450995, "rewards/wordcountpos_reward/raw_geo/std": 0.16562880687119094, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.14851112939963643, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1177.25, "completions/mean_terminated_length": 1155.7333984375, "completions/min_length": 914.0, "completions/min_terminated_length": 914.0, "epoch": 0.8955791158231646, "frac_reward_zero_std": 0.0, "grad_norm": 2.6912949104984323, "kl": 0.0122528076171875, "learning_rate": 1.2967708994845247e-07, "loss": 0.0279, "num_tokens": 196118286.0, "reward": 0.0, "reward_std": 0.9670395851135254, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06039417607969273, "rewards/wordcountpos_reward/raw_geo/std": 0.13435538417162624, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.12345339501504503, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1268.0, "completions/mean_terminated_length": 1268.0, "completions/min_length": 1069.0, "completions/min_terminated_length": 1069.0, "epoch": 0.8957791558311662, "frac_reward_zero_std": 0.0, "grad_norm": 2.668019909914833, "kl": 0.0145416259765625, "learning_rate": 1.2956496869171725e-07, "loss": -0.0035, "num_tokens": 196156838.0, "reward": 0.0, "reward_std": 0.7283948659896851, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18371975936121057, "rewards/wordcountpos_reward/raw_geo/std": 0.16773075627231007, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1214.125, "completions/mean_terminated_length": 1214.125, "completions/min_length": 995.0, "completions/min_terminated_length": 995.0, "epoch": 0.8959791958391679, "frac_reward_zero_std": 0.0, "grad_norm": 3.1091197738951557, "kl": 0.0174713134765625, "learning_rate": 1.2945305244101249e-07, "loss": 0.0014, "num_tokens": 196207384.0, "reward": 1.4901161193847656e-08, "reward_std": 1.017890214920044, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18244726695886837, "rewards/wordcountpos_reward/raw_geo/std": 0.06381960845556037, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 966.1875, "completions/mean_terminated_length": 966.1875, "completions/min_length": 571.0, "completions/min_terminated_length": 571.0, "epoch": 0.8961792358471694, "frac_reward_zero_std": 0.0, "grad_norm": 3.4228427898845393, "kl": 0.0146942138671875, "learning_rate": 1.29341341250909e-07, "loss": -0.0688, "num_tokens": 196254043.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0223793983459473, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12602391810982386, "rewards/wordcountpos_reward/raw_geo/std": 0.08928896425586275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455328, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1267.4375, "completions/mean_terminated_length": 1234.21435546875, "completions/min_length": 592.0, "completions/min_terminated_length": 592.0, "epoch": 0.8963792758551711, "frac_reward_zero_std": 0.0, "grad_norm": 3.119129823108601, "kl": 0.0206298828125, "learning_rate": 1.292298351758778e-07, "loss": 0.0146, "num_tokens": 196302402.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9844325184822083, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.049261243267245146, "rewards/wordcountpos_reward/raw_geo/std": 0.06868592206872455, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466156, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1235.625, "completions/mean_terminated_length": 1197.857177734375, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.8965793158631726, "frac_reward_zero_std": 0.0, "grad_norm": 2.7887593046011654, "kl": 0.0140533447265625, "learning_rate": 1.2911853427028952e-07, "loss": -0.0085, "num_tokens": 196349012.0, "reward": 0.0, "reward_std": 0.9329230785369873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11691479674432491, "rewards/wordcountpos_reward/raw_geo/std": 0.18529477488771123, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.1, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1147.375, "completions/mean_terminated_length": 1147.375, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.8967793558711742, "frac_reward_zero_std": 0.0, "grad_norm": 2.9319772700502438, "kl": 0.0165863037109375, "learning_rate": 1.290074385884151e-07, "loss": 0.0272, "num_tokens": 196393106.0, "reward": 0.0, "reward_std": 0.6510517597198486, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06198900317945391, "rewards/wordcountpos_reward/raw_geo/std": 0.22538598871713228, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1225.1875, "completions/mean_terminated_length": 1206.86669921875, "completions/min_length": 926.0, "completions/min_terminated_length": 926.0, "epoch": 0.8969793958791759, "frac_reward_zero_std": 0.0, "grad_norm": 2.438698967435072, "kl": 0.0120391845703125, "learning_rate": 1.2889654818442535e-07, "loss": -0.0116, "num_tokens": 196438301.0, "reward": -1.4901161193847656e-08, "reward_std": 1.010681390762329, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03698356176003161, "rewards/wordcountpos_reward/raw_geo/std": 0.11374795612810507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1182.0, "completions/mean_terminated_length": 1160.800048828125, "completions/min_length": 894.0, "completions/min_terminated_length": 894.0, "epoch": 0.8971794358871774, "frac_reward_zero_std": 0.0, "grad_norm": 3.292350632809605, "kl": 0.02008056640625, "learning_rate": 1.2878586311239098e-07, "loss": 0.0121, "num_tokens": 196486469.0, "reward": 0.0, "reward_std": 0.6629685163497925, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.19567146774225055, "rewards/wordcountpos_reward/raw_geo/std": 0.30906672603110275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1197.5, "completions/mean_terminated_length": 1197.5, "completions/min_length": 1053.0, "completions/min_terminated_length": 1053.0, "epoch": 0.8973794758951791, "frac_reward_zero_std": 0.0, "grad_norm": 3.1718078945854216, "kl": 0.0198974609375, "learning_rate": 1.2867538342628235e-07, "loss": 0.0027, "num_tokens": 196536517.0, "reward": 7.450580596923828e-09, "reward_std": 1.0533549785614014, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.02084281348192286, "rewards/wordcountpos_reward/raw_geo/std": 0.06417827592920736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.0718795288428261, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1281.0, "completions/max_terminated_length": 1281.0, "completions/mean_length": 968.0, "completions/mean_terminated_length": 968.0, "completions/min_length": 747.0, "completions/min_terminated_length": 747.0, "epoch": 0.8975795159031806, "frac_reward_zero_std": 0.0, "grad_norm": 3.2995986722051356, "kl": 0.0135650634765625, "learning_rate": 1.2856510917996995e-07, "loss": 0.0238, "num_tokens": 196579005.0, "reward": 0.0, "reward_std": 0.825952410697937, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0759953791927275, "rewards/wordcountpos_reward/raw_geo/std": 0.08072672121740336, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1239.8125, "completions/mean_terminated_length": 1222.4666748046875, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.8977795559111822, "frac_reward_zero_std": 0.0, "grad_norm": 2.5172762826009745, "kl": 0.0128326416015625, "learning_rate": 1.2845504042722402e-07, "loss": 0.009, "num_tokens": 196631586.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9611141085624695, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024919509254733078, "rewards/wordcountpos_reward/raw_geo/std": 0.053061993834100804, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 1051.875, "completions/mean_terminated_length": 1051.875, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.8979795959191839, "frac_reward_zero_std": 0.0, "grad_norm": 3.6088501479232, "kl": 0.018890380859375, "learning_rate": 1.2834517722171464e-07, "loss": 0.0116, "num_tokens": 196665720.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5492627024650574, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18716270126120918, "rewards/wordcountpos_reward/raw_geo/std": 0.08518993427492709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1337.125, "completions/mean_terminated_length": 1313.857177734375, "completions/min_length": 1084.0, "completions/min_terminated_length": 1084.0, "epoch": 0.8981796359271854, "frac_reward_zero_std": 0.0, "grad_norm": 2.5338785191230118, "kl": 0.01153564453125, "learning_rate": 1.2823551961701148e-07, "loss": -0.0013, "num_tokens": 196721410.0, "reward": 4.470348358154297e-08, "reward_std": 0.9782317876815796, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05926105985920477, "rewards/wordcountpos_reward/raw_geo/std": 0.04455686776040281, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 999.375, "completions/mean_terminated_length": 999.375, "completions/min_length": 693.0, "completions/min_terminated_length": 693.0, "epoch": 0.8983796759351871, "frac_reward_zero_std": 0.0, "grad_norm": 3.662263239956439, "kl": 0.016845703125, "learning_rate": 1.2812606766658414e-07, "loss": -0.0211, "num_tokens": 196754800.0, "reward": 0.0, "reward_std": 1.0223361253738403, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.058618894933973244, "rewards/wordcountpos_reward/raw_geo/std": 0.0619975406406767, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575908, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1114.625, "completions/mean_terminated_length": 1114.625, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.8985797159431886, "frac_reward_zero_std": 0.0, "grad_norm": 2.401194049977557, "kl": 0.01343536376953125, "learning_rate": 1.280168214238019e-07, "loss": -0.0268, "num_tokens": 196802634.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0524804592132568, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1349568609081765, "rewards/wordcountpos_reward/raw_geo/std": 0.05097990316273567, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09737289911202954, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1064.0, "completions/mean_terminated_length": 1034.933349609375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.8987797559511902, "frac_reward_zero_std": 0.0, "grad_norm": 3.476681377761633, "kl": 0.017791748046875, "learning_rate": 1.279077809419338e-07, "loss": 0.0145, "num_tokens": 196841226.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9379866719245911, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04976930037818787, "rewards/wordcountpos_reward/raw_geo/std": 0.21452526022485488, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898339, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1112.4375, "completions/mean_terminated_length": 1086.60009765625, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.8989797959591919, "frac_reward_zero_std": 0.0, "grad_norm": 2.680412666115359, "kl": 0.0140533447265625, "learning_rate": 1.277989462741482e-07, "loss": 0.0039, "num_tokens": 196892049.0, "reward": 0.0, "reward_std": 0.6454829573631287, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13609625451891383, "rewards/wordcountpos_reward/raw_geo/std": 0.1244265111776894, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509011, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1083.3125, "completions/mean_terminated_length": 1055.533447265625, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.8991798359671934, "frac_reward_zero_std": 0.0, "grad_norm": 3.511716269528534, "kl": 0.0154876708984375, "learning_rate": 1.2769031747351377e-07, "loss": 0.0077, "num_tokens": 196932286.0, "reward": 0.0, "reward_std": 0.7260481715202332, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.003115610248397989, "rewards/wordcountpos_reward/raw_geo/std": 0.06481354163971939, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327551, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 1290.375, "completions/mean_terminated_length": 1080.75, "completions/min_length": 669.0, "completions/min_terminated_length": 669.0, "epoch": 0.8993798759751951, "frac_reward_zero_std": 0.0, "grad_norm": 3.2261641682064113, "kl": 0.019195556640625, "learning_rate": 1.2758189459299813e-07, "loss": -0.0526, "num_tokens": 196991764.0, "reward": -2.9802322387695312e-08, "reward_std": 0.974234402179718, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.030316399003731745, "rewards/wordcountpos_reward/raw_geo/std": 0.11036777883635099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1177.0, "completions/max_terminated_length": 1177.0, "completions/mean_length": 945.5, "completions/mean_terminated_length": 945.5, "completions/min_length": 565.0, "completions/min_terminated_length": 565.0, "epoch": 0.8995799159831966, "frac_reward_zero_std": 0.0, "grad_norm": 3.760506653946858, "kl": 0.018951416015625, "learning_rate": 1.274736776854688e-07, "loss": -0.0125, "num_tokens": 197033516.0, "reward": 0.0, "reward_std": 1.0381752252578735, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.19277859309389514, "rewards/wordcountpos_reward/raw_geo/std": 0.15603867447983263, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466154, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1260.75, "completions/mean_terminated_length": 1260.75, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.8997799559911982, "frac_reward_zero_std": 0.0, "grad_norm": 2.235664464239749, "kl": 0.0122833251953125, "learning_rate": 1.2736566680369293e-07, "loss": 0.0163, "num_tokens": 197076880.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8338193297386169, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015631487527470997, "rewards/wordcountpos_reward/raw_geo/std": 0.08356756128104051, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1043.25, "completions/mean_terminated_length": 1043.25, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.8999799959991999, "frac_reward_zero_std": 0.0, "grad_norm": 3.2277613743780313, "kl": 0.01678466796875, "learning_rate": 1.272578620003371e-07, "loss": -0.0374, "num_tokens": 197116908.0, "reward": 0.0, "reward_std": 1.040210247039795, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10269149916493642, "rewards/wordcountpos_reward/raw_geo/std": 0.09360995735599159, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1300.0, "completions/max_terminated_length": 1300.0, "completions/mean_length": 1062.5, "completions/mean_terminated_length": 1062.5, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.9001800360072014, "frac_reward_zero_std": 0.0, "grad_norm": 3.1399109462282215, "kl": 0.01314544677734375, "learning_rate": 1.2715026332796737e-07, "loss": 0.0315, "num_tokens": 197160172.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7759186625480652, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.23420673339755604, "rewards/wordcountpos_reward/raw_geo/std": 0.3958799181799324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1258305739211792, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1116.5, "completions/mean_terminated_length": 1090.933349609375, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.9003800760152031, "frac_reward_zero_std": 0.0, "grad_norm": 3.467746875072097, "kl": 0.0157623291015625, "learning_rate": 1.2704287083904933e-07, "loss": 0.0179, "num_tokens": 197210908.0, "reward": 0.0, "reward_std": 0.7348229885101318, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1349712009782543, "rewards/wordcountpos_reward/raw_geo/std": 0.1403471964253292, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1116.375, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.9005801160232046, "frac_reward_zero_std": 0.0, "grad_norm": 3.0933464135719095, "kl": 0.018768310546875, "learning_rate": 1.2693568458594808e-07, "loss": -0.0021, "num_tokens": 197260858.0, "reward": -4.470348358154297e-08, "reward_std": 1.0126967430114746, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.058901782128385, "rewards/wordcountpos_reward/raw_geo/std": 0.042793288212831605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1169.75, "completions/mean_terminated_length": 1122.571533203125, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.9007801560312062, "frac_reward_zero_std": 0.0, "grad_norm": 3.235474630757732, "kl": 0.017364501953125, "learning_rate": 1.268287046209282e-07, "loss": -0.0807, "num_tokens": 197316486.0, "reward": -4.470348358154297e-08, "reward_std": 0.9473079442977905, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11798622164503063, "rewards/wordcountpos_reward/raw_geo/std": 0.11293860039453972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 961.5625, "completions/mean_terminated_length": 961.5625, "completions/min_length": 763.0, "completions/min_terminated_length": 763.0, "epoch": 0.9009801960392079, "frac_reward_zero_std": 0.0, "grad_norm": 3.6410622931180923, "kl": 0.01800537109375, "learning_rate": 1.2672193099615347e-07, "loss": -0.0091, "num_tokens": 197357783.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9463679194450378, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.027778180713813418, "rewards/wordcountpos_reward/raw_geo/std": 0.06653121518539104, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 1107.875, "completions/mean_terminated_length": 1107.875, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.9011802360472094, "frac_reward_zero_std": 0.0, "grad_norm": 2.5440429155253184, "kl": 0.0131072998046875, "learning_rate": 1.2661536376368723e-07, "loss": -0.0191, "num_tokens": 197396605.0, "reward": 0.0, "reward_std": 0.7810201644897461, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.050953371330545595, "rewards/wordcountpos_reward/raw_geo/std": 0.05960773256792437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 1368.0625, "completions/mean_terminated_length": 1236.125, "completions/min_length": 1115.0, "completions/min_terminated_length": 1115.0, "epoch": 0.9013802760552111, "frac_reward_zero_std": 0.0, "grad_norm": 2.968173064997409, "kl": 0.0133819580078125, "learning_rate": 1.2650900297549216e-07, "loss": 0.011, "num_tokens": 197440486.0, "reward": 0.0, "reward_std": 0.8736310005187988, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08214005707059038, "rewards/wordcountpos_reward/raw_geo/std": 0.06989666271668146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1147.5, "completions/mean_terminated_length": 1147.5, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.9015803160632127, "frac_reward_zero_std": 0.0, "grad_norm": 3.3356269091160136, "kl": 0.0180206298828125, "learning_rate": 1.2640284868343033e-07, "loss": -0.0383, "num_tokens": 197482854.0, "reward": 0.0, "reward_std": 0.8465081453323364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10837199208383898, "rewards/wordcountpos_reward/raw_geo/std": 0.05193807314718477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1080.0, "completions/mean_terminated_length": 1080.0, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.9017803560712142, "frac_reward_zero_std": 0.0, "grad_norm": 2.986536703304673, "kl": 0.01409912109375, "learning_rate": 1.2629690093926297e-07, "loss": -0.0314, "num_tokens": 197529070.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9422441720962524, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13140277194987132, "rewards/wordcountpos_reward/raw_geo/std": 0.1565792189280679, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.10809803506625448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1277.75, "completions/mean_terminated_length": 1262.933349609375, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.9019803960792159, "frac_reward_zero_std": 0.0, "grad_norm": 2.9742476401325733, "kl": 0.017974853515625, "learning_rate": 1.2619115979465067e-07, "loss": 0.0335, "num_tokens": 197566338.0, "reward": 0.0, "reward_std": 1.064551591873169, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2897467414874019, "rewards/wordcountpos_reward/raw_geo/std": 0.07086606944279948, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11279282877125756, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1257.875, "completions/mean_terminated_length": 1147.8182373046875, "completions/min_length": 924.0, "completions/min_terminated_length": 924.0, "epoch": 0.9021804360872174, "frac_reward_zero_std": 0.0, "grad_norm": 2.5042250914848005, "kl": 0.01239013671875, "learning_rate": 1.2608562530115338e-07, "loss": -0.0242, "num_tokens": 197619896.0, "reward": -7.450580596923828e-09, "reward_std": 1.052369475364685, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.00028650029319673975, "rewards/wordcountpos_reward/raw_geo/std": 0.2724851836388383, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1253.375, "completions/mean_terminated_length": 1196.4615478515625, "completions/min_length": 591.0, "completions/min_terminated_length": 591.0, "epoch": 0.9023804760952191, "frac_reward_zero_std": 0.0, "grad_norm": 2.8119106395266518, "kl": 0.01666259765625, "learning_rate": 1.259802975102302e-07, "loss": -0.0856, "num_tokens": 197672886.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9255926012992859, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012563964161263005, "rewards/wordcountpos_reward/raw_geo/std": 0.18290534293963234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1230.5625, "completions/mean_terminated_length": 1192.071533203125, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.9025805161032207, "frac_reward_zero_std": 0.0, "grad_norm": 2.5304271968658285, "kl": 0.0159759521484375, "learning_rate": 1.2587517647323922e-07, "loss": -0.0023, "num_tokens": 197724527.0, "reward": -1.4901161193847656e-08, "reward_std": 1.001382827758789, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.28033168125426755, "rewards/wordcountpos_reward/raw_geo/std": 0.12837269131564927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362769, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1148.25, "completions/mean_terminated_length": 1067.076904296875, "completions/min_length": 882.0, "completions/min_terminated_length": 882.0, "epoch": 0.9027805561112222, "frac_reward_zero_std": 0.0, "grad_norm": 2.6796945864168626, "kl": 0.0143585205078125, "learning_rate": 1.257702622414382e-07, "loss": 0.0175, "num_tokens": 197770779.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6017957925796509, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.054239404515703804, "rewards/wordcountpos_reward/raw_geo/std": 0.14933006613121816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 1099.125, "completions/mean_terminated_length": 1099.125, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.9029805961192239, "frac_reward_zero_std": 0.0, "grad_norm": 3.1360144241895282, "kl": 0.015380859375, "learning_rate": 1.2566555486598378e-07, "loss": 0.0122, "num_tokens": 197804565.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9413121938705444, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.028554125394447846, "rewards/wordcountpos_reward/raw_geo/std": 0.1427161729895929, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1161.4375, "completions/mean_terminated_length": 1161.4375, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.9031806361272254, "frac_reward_zero_std": 0.0, "grad_norm": 2.4658214648472576, "kl": 0.0180816650390625, "learning_rate": 1.2556105439793156e-07, "loss": -0.0376, "num_tokens": 197846756.0, "reward": 7.450580596923828e-09, "reward_std": 1.0599298477172852, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.015304228236992215, "rewards/wordcountpos_reward/raw_geo/std": 0.07645670301553487, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07876359377087681, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1116.375, "completions/mean_terminated_length": 1027.84619140625, "completions/min_length": 776.0, "completions/min_terminated_length": 776.0, "epoch": 0.9033806761352271, "frac_reward_zero_std": 0.0, "grad_norm": 3.234146536998606, "kl": 0.0177154541015625, "learning_rate": 1.2545676088823656e-07, "loss": -0.0432, "num_tokens": 197888522.0, "reward": 4.470348358154297e-08, "reward_std": 0.9993795156478882, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.061424344226634, "rewards/wordcountpos_reward/raw_geo/std": 0.06346574880974774, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11122216672215288, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 1253.9375, "completions/mean_terminated_length": 1237.533447265625, "completions/min_length": 975.0, "completions/min_terminated_length": 975.0, "epoch": 0.9035807161432287, "frac_reward_zero_std": 0.0, "grad_norm": 3.0616917262386063, "kl": 0.014434814453125, "learning_rate": 1.2535267438775276e-07, "loss": -0.0414, "num_tokens": 197943081.0, "reward": 0.0, "reward_std": 0.5775501728057861, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3014929737105944, "rewards/wordcountpos_reward/raw_geo/std": 0.3264720603945404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1340.25, "completions/mean_terminated_length": 1244.4000244140625, "completions/min_length": 680.0, "completions/min_terminated_length": 680.0, "epoch": 0.9037807561512302, "frac_reward_zero_std": 0.0, "grad_norm": 3.0383191178274402, "kl": 0.0161895751953125, "learning_rate": 1.2524879494723327e-07, "loss": -0.0097, "num_tokens": 197998381.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7991588115692139, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05042159474136328, "rewards/wordcountpos_reward/raw_geo/std": 0.11295889174470054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1374.0, "completions/max_terminated_length": 1374.0, "completions/mean_length": 1180.5625, "completions/mean_terminated_length": 1180.5625, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.9039807961592319, "frac_reward_zero_std": 0.0, "grad_norm": 2.9522438288887636, "kl": 0.014190673828125, "learning_rate": 1.2514512261733002e-07, "loss": 0.0102, "num_tokens": 198052006.0, "reward": 0.0, "reward_std": 0.3429315686225891, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24708745510266567, "rewards/wordcountpos_reward/raw_geo/std": 0.22484843705871566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 993.0625, "completions/mean_terminated_length": 993.0625, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.9041808361672334, "frac_reward_zero_std": 0.0, "grad_norm": 2.7783988184059023, "kl": 0.01403045654296875, "learning_rate": 1.250416574485943e-07, "loss": -0.0049, "num_tokens": 198090423.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9564999341964722, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03758059693317247, "rewards/wordcountpos_reward/raw_geo/std": 0.05207336062725898, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1123.875, "completions/mean_terminated_length": 1123.875, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.904380876175235, "frac_reward_zero_std": 0.0, "grad_norm": 2.7794761364629528, "kl": 0.0175018310546875, "learning_rate": 1.2493839949147607e-07, "loss": -0.039, "num_tokens": 198137949.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0160727500915527, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1573651452354914, "rewards/wordcountpos_reward/raw_geo/std": 0.1067641959501508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 944.9375, "completions/mean_terminated_length": 944.9375, "completions/min_length": 793.0, "completions/min_terminated_length": 793.0, "epoch": 0.9045809161832367, "frac_reward_zero_std": 0.0, "grad_norm": 3.837803364683008, "kl": 0.017974853515625, "learning_rate": 1.2483534879632446e-07, "loss": 0.014, "num_tokens": 198179260.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6394060254096985, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.26033312726347585, "rewards/wordcountpos_reward/raw_geo/std": 0.28378494235474816, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10878112581387146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1130.0, "completions/mean_terminated_length": 1130.0, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.9047809561912382, "frac_reward_zero_std": 0.0, "grad_norm": 3.6939098316123324, "kl": 0.02020263671875, "learning_rate": 1.247325054133873e-07, "loss": -0.0264, "num_tokens": 198224132.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8453148007392883, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.2410319339704779, "rewards/wordcountpos_reward/raw_geo/std": 0.3217339358868181, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 910.8125, "completions/mean_terminated_length": 910.8125, "completions/min_length": 722.0, "completions/min_terminated_length": 722.0, "epoch": 0.9049809961992399, "frac_reward_zero_std": 0.0, "grad_norm": 3.4012841435476293, "kl": 0.01629638671875, "learning_rate": 1.2462986939281166e-07, "loss": -0.0226, "num_tokens": 198273001.0, "reward": 0.0, "reward_std": 0.529931366443634, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13259176977804743, "rewards/wordcountpos_reward/raw_geo/std": 0.1515888134673905, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11344765475923412, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1218.0, "completions/max_terminated_length": 1218.0, "completions/mean_length": 1011.4375, "completions/mean_terminated_length": 1011.4375, "completions/min_length": 814.0, "completions/min_terminated_length": 814.0, "epoch": 0.9051810362072414, "frac_reward_zero_std": 0.0, "grad_norm": 2.8971381994046728, "kl": 0.0121612548828125, "learning_rate": 1.245274407846433e-07, "loss": -0.0262, "num_tokens": 198312728.0, "reward": 0.0, "reward_std": 0.4250006079673767, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12103658095744964, "rewards/wordcountpos_reward/raw_geo/std": 0.21408974447143914, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1094.6875, "completions/mean_terminated_length": 1067.666748046875, "completions/min_length": 869.0, "completions/min_terminated_length": 869.0, "epoch": 0.905381076215243, "frac_reward_zero_std": 0.0, "grad_norm": 3.142644208622096, "kl": 0.016876220703125, "learning_rate": 1.2442521963882677e-07, "loss": -0.0052, "num_tokens": 198364203.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9705259799957275, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.018036289124630515, "rewards/wordcountpos_reward/raw_geo/std": 0.17161070855772761, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1265.75, "completions/mean_terminated_length": 1250.1334228515625, "completions/min_length": 1057.0, "completions/min_terminated_length": 1057.0, "epoch": 0.9055811162232447, "frac_reward_zero_std": 0.0, "grad_norm": 3.058109472139607, "kl": 0.0160980224609375, "learning_rate": 1.2432320600520568e-07, "loss": 0.0169, "num_tokens": 198412999.0, "reward": 0.0, "reward_std": 0.9944441318511963, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10254705255323553, "rewards/wordcountpos_reward/raw_geo/std": 0.11588216193742923, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 944.5, "completions/mean_terminated_length": 944.5, "completions/min_length": 552.0, "completions/min_terminated_length": 552.0, "epoch": 0.9057811562312462, "frac_reward_zero_std": 0.0, "grad_norm": 3.3077793143510443, "kl": 0.017822265625, "learning_rate": 1.2422139993352235e-07, "loss": -0.0335, "num_tokens": 198447951.0, "reward": 1.4901161193847656e-08, "reward_std": 1.021714448928833, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10534659935652196, "rewards/wordcountpos_reward/raw_geo/std": 0.33972233999716234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1211.4375, "completions/mean_terminated_length": 1192.2000732421875, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.9059811962392479, "frac_reward_zero_std": 0.0, "grad_norm": 2.993918057523646, "kl": 0.01544189453125, "learning_rate": 1.2411980147341777e-07, "loss": -0.0223, "num_tokens": 198497926.0, "reward": 0.0, "reward_std": 0.7586450576782227, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.054793515609267485, "rewards/wordcountpos_reward/raw_geo/std": 0.1817823754927932, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1067.9375, "completions/mean_terminated_length": 1067.9375, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.9061812362472494, "frac_reward_zero_std": 0.0, "grad_norm": 3.6247350244462644, "kl": 0.038177490234375, "learning_rate": 1.2401841067443188e-07, "loss": 0.0277, "num_tokens": 198546365.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9461328387260437, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.191963804962094, "rewards/wordcountpos_reward/raw_geo/std": 0.058263381151237977, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1288.6875, "completions/mean_terminated_length": 1239.923095703125, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.906381276255251, "frac_reward_zero_std": 0.0, "grad_norm": 3.245127550263272, "kl": 0.018768310546875, "learning_rate": 1.2391722758600327e-07, "loss": 0.0085, "num_tokens": 198601040.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9051002264022827, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1121199757373623, "rewards/wordcountpos_reward/raw_geo/std": 0.13199350047447597, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8, "rewards/wordcountpos_reward/raw_rule/std": 0.05962847939999442, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1200.6875, "completions/mean_terminated_length": 1200.6875, "completions/min_length": 755.0, "completions/min_terminated_length": 755.0, "epoch": 0.9065813162632527, "frac_reward_zero_std": 0.0, "grad_norm": 3.0150660012159447, "kl": 0.018707275390625, "learning_rate": 1.2381625225746937e-07, "loss": -0.0385, "num_tokens": 198641363.0, "reward": 1.4901161193847656e-08, "reward_std": 0.954128086566925, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05314404098057059, "rewards/wordcountpos_reward/raw_geo/std": 0.13240040499034447, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 1224.25, "completions/mean_terminated_length": 1205.86669921875, "completions/min_length": 1032.0, "completions/min_terminated_length": 1032.0, "epoch": 0.9067813562712542, "frac_reward_zero_std": 0.0, "grad_norm": 3.5296307046798994, "kl": 0.021697998046875, "learning_rate": 1.2371548473806602e-07, "loss": -0.026, "num_tokens": 198684807.0, "reward": 0.0, "reward_std": 0.7737401723861694, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05047100998336901, "rewards/wordcountpos_reward/raw_geo/std": 0.16977551404572974, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1082.8125, "completions/mean_terminated_length": 1055.0, "completions/min_length": 715.0, "completions/min_terminated_length": 715.0, "epoch": 0.9069813962792559, "frac_reward_zero_std": 0.0, "grad_norm": 2.7391796415844696, "kl": 0.01483154296875, "learning_rate": 1.236149250769281e-07, "loss": -0.0164, "num_tokens": 198720636.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9308068156242371, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.019664470950205477, "rewards/wordcountpos_reward/raw_geo/std": 0.03949073616192054, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333331, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 1201.8125, "completions/mean_terminated_length": 1159.21435546875, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.9071814362872574, "frac_reward_zero_std": 0.0, "grad_norm": 2.9916832103835747, "kl": 0.0163421630859375, "learning_rate": 1.2351457332308882e-07, "loss": -0.042, "num_tokens": 198772569.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7566127777099609, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15542799260398552, "rewards/wordcountpos_reward/raw_geo/std": 0.09468869767044817, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.103905227473387, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1048.625, "completions/mean_terminated_length": 1048.625, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.907381476295259, "frac_reward_zero_std": 0.0, "grad_norm": 3.426512779158223, "kl": 0.017974853515625, "learning_rate": 1.2341442952548033e-07, "loss": 0.0196, "num_tokens": 198804003.0, "reward": 0.0, "reward_std": 0.7445440292358398, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07969942758844802, "rewards/wordcountpos_reward/raw_geo/std": 0.08538697996364927, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09016445879408153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1225.9375, "completions/mean_terminated_length": 1225.9375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.9075815163032607, "frac_reward_zero_std": 0.0, "grad_norm": 3.3215990042558254, "kl": 0.021392822265625, "learning_rate": 1.23314493732933e-07, "loss": 0.0137, "num_tokens": 198850218.0, "reward": 0.0, "reward_std": 0.6974668502807617, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.011478578727066799, "rewards/wordcountpos_reward/raw_geo/std": 0.05211798078906457, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1175.875, "completions/mean_terminated_length": 1175.875, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.9077815563112622, "frac_reward_zero_std": 0.0, "grad_norm": 3.5117519252114286, "kl": 0.021270751953125, "learning_rate": 1.232147659941761e-07, "loss": -0.001, "num_tokens": 198894152.0, "reward": 0.0, "reward_std": 1.0001753568649292, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07166333417887358, "rewards/wordcountpos_reward/raw_geo/std": 0.11592876217186493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 944.875, "completions/mean_terminated_length": 944.875, "completions/min_length": 768.0, "completions/min_terminated_length": 768.0, "epoch": 0.9079815963192639, "frac_reward_zero_std": 0.0, "grad_norm": 3.721628634047811, "kl": 0.019989013671875, "learning_rate": 1.2311524635783724e-07, "loss": -0.0174, "num_tokens": 198928302.0, "reward": 0.0, "reward_std": 1.0251308679580688, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.040459640765292286, "rewards/wordcountpos_reward/raw_geo/std": 0.14940131933793058, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1175.75, "completions/mean_terminated_length": 1175.75, "completions/min_length": 905.0, "completions/min_terminated_length": 905.0, "epoch": 0.9081816363272655, "frac_reward_zero_std": 0.0, "grad_norm": 2.7673423731357882, "kl": 0.015716552734375, "learning_rate": 1.230159348724428e-07, "loss": 0.0072, "num_tokens": 198975922.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0117638111114502, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18009187655804834, "rewards/wordcountpos_reward/raw_geo/std": 0.1787497890913842, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 1188.875, "completions/mean_terminated_length": 1085.166748046875, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.908381676335267, "frac_reward_zero_std": 0.0, "grad_norm": 3.1336282351140405, "kl": 0.01654052734375, "learning_rate": 1.2291683158641728e-07, "loss": -0.0072, "num_tokens": 199023400.0, "reward": -1.1175870895385742e-08, "reward_std": 1.051785945892334, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12782018588104585, "rewards/wordcountpos_reward/raw_geo/std": 0.12494101615377945, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1340.6875, "completions/mean_terminated_length": 1330.0667724609375, "completions/min_length": 1104.0, "completions/min_terminated_length": 1104.0, "epoch": 0.9085817163432687, "frac_reward_zero_std": 0.0, "grad_norm": 3.1758190708977336, "kl": 0.017974853515625, "learning_rate": 1.2281793654808412e-07, "loss": -0.0133, "num_tokens": 199071267.0, "reward": 0.0, "reward_std": 1.0535410642623901, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.358007234192454, "rewards/wordcountpos_reward/raw_geo/std": 0.3024903587541043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 978.5625, "completions/mean_terminated_length": 978.5625, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.9087817563512702, "frac_reward_zero_std": 0.0, "grad_norm": 3.0085634815849853, "kl": 0.0169677734375, "learning_rate": 1.2271924980566487e-07, "loss": -0.034, "num_tokens": 199121068.0, "reward": 0.0, "reward_std": 0.9703109264373779, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026296713228143045, "rewards/wordcountpos_reward/raw_geo/std": 0.06686308046327184, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0894427190999916, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 1191.625, "completions/mean_terminated_length": 1120.4615478515625, "completions/min_length": 799.0, "completions/min_terminated_length": 799.0, "epoch": 0.9089817963592719, "frac_reward_zero_std": 0.0, "grad_norm": 3.2299055460783057, "kl": 0.018524169921875, "learning_rate": 1.2262077140727966e-07, "loss": -0.0506, "num_tokens": 199156766.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6917707920074463, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1358912065429266, "rewards/wordcountpos_reward/raw_geo/std": 0.2926538672447141, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 1031.25, "completions/mean_terminated_length": 1031.25, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.9091818363672735, "frac_reward_zero_std": 0.0, "grad_norm": 3.891690718748233, "kl": 0.023773193359375, "learning_rate": 1.2252250140094692e-07, "loss": -0.0354, "num_tokens": 199205226.0, "reward": -1.30385160446167e-08, "reward_std": 1.0104120969772339, "rewards/wordcountpos_reward/mean": -1.30385160446167e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08072688376888643, "rewards/wordcountpos_reward/raw_geo/std": 0.06026810482831866, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1489.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1296.375, "completions/mean_terminated_length": 1296.375, "completions/min_length": 1077.0, "completions/min_terminated_length": 1077.0, "epoch": 0.909381876375275, "frac_reward_zero_std": 0.0, "grad_norm": 2.9937874718547337, "kl": 0.014068603515625, "learning_rate": 1.2242443983458372e-07, "loss": -0.0101, "num_tokens": 199247704.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9254773855209351, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02431358350979079, "rewards/wordcountpos_reward/raw_geo/std": 0.08937708906703334, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1203.6875, "completions/mean_terminated_length": 1069.0, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.9095819163832767, "frac_reward_zero_std": 0.0, "grad_norm": 3.1694832790021796, "kl": 0.03057861328125, "learning_rate": 1.2232658675600522e-07, "loss": -0.039, "num_tokens": 199304683.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0493521690368652, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2399933529717959, "rewards/wordcountpos_reward/raw_geo/std": 0.16986151007055927, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1037.6875, "completions/mean_terminated_length": 1037.6875, "completions/min_length": 697.0, "completions/min_terminated_length": 697.0, "epoch": 0.9097819563912782, "frac_reward_zero_std": 0.0, "grad_norm": 3.618252852877744, "kl": 0.020263671875, "learning_rate": 1.22228942212925e-07, "loss": 0.0169, "num_tokens": 199352102.0, "reward": 0.0, "reward_std": 0.652031421661377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15395859462468978, "rewards/wordcountpos_reward/raw_geo/std": 0.1631961981032231, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1078.0, "completions/max_terminated_length": 1078.0, "completions/mean_length": 946.625, "completions/mean_terminated_length": 946.625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.9099819963992799, "frac_reward_zero_std": 0.0, "grad_norm": 3.5019719139516203, "kl": 0.02410888671875, "learning_rate": 1.2213150625295507e-07, "loss": -0.0196, "num_tokens": 199393520.0, "reward": 0.0, "reward_std": 0.40528202056884766, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.29032109623828894, "rewards/wordcountpos_reward/raw_geo/std": 0.1172110027431848, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1149.375, "completions/mean_terminated_length": 1149.375, "completions/min_length": 1002.0, "completions/min_terminated_length": 1002.0, "epoch": 0.9101820364072815, "frac_reward_zero_std": 0.0, "grad_norm": 3.4902328801472966, "kl": 0.018341064453125, "learning_rate": 1.2203427892360562e-07, "loss": -0.0046, "num_tokens": 199430198.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9129124879837036, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24811560530382637, "rewards/wordcountpos_reward/raw_geo/std": 0.08426177165468555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.0758897836290186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 1310.875, "completions/mean_terminated_length": 1298.2667236328125, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.910382076415283, "frac_reward_zero_std": 0.0, "grad_norm": 2.996825300639923, "kl": 0.018829345703125, "learning_rate": 1.219372602722851e-07, "loss": -0.013, "num_tokens": 199482580.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7957712411880493, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3292631982743012, "rewards/wordcountpos_reward/raw_geo/std": 0.1575492593209498, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 930.1875, "completions/mean_terminated_length": 930.1875, "completions/min_length": 700.0, "completions/min_terminated_length": 700.0, "epoch": 0.9105821164232847, "frac_reward_zero_std": 0.0, "grad_norm": 3.452644651575259, "kl": 0.0153656005859375, "learning_rate": 1.2184045034630024e-07, "loss": -0.0355, "num_tokens": 199512959.0, "reward": -7.450580596923828e-09, "reward_std": 1.0252037048339844, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.041044440048615675, "rewards/wordcountpos_reward/raw_geo/std": 0.04778987316319032, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 1266.25, "completions/mean_terminated_length": 1188.3333740234375, "completions/min_length": 987.0, "completions/min_terminated_length": 987.0, "epoch": 0.9107821564312862, "frac_reward_zero_std": 0.0, "grad_norm": 2.7466872386146015, "kl": 0.0122833251953125, "learning_rate": 1.21743849192856e-07, "loss": -0.0045, "num_tokens": 199574499.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9996079206466675, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2348212702255483, "rewards/wordcountpos_reward/raw_geo/std": 0.09750023472015606, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195013, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1208.8125, "completions/mean_terminated_length": 1189.4000244140625, "completions/min_length": 1030.0, "completions/min_terminated_length": 1030.0, "epoch": 0.9109821964392879, "frac_reward_zero_std": 0.0, "grad_norm": 2.8391468559899096, "kl": 0.01247406005859375, "learning_rate": 1.2164745685905567e-07, "loss": -0.0002, "num_tokens": 199609464.0, "reward": -7.450580596923828e-09, "reward_std": 0.9634072780609131, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08160455846571027, "rewards/wordcountpos_reward/raw_geo/std": 0.05590871022676189, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1201.625, "completions/mean_terminated_length": 1201.625, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.9111822364472895, "frac_reward_zero_std": 0.0, "grad_norm": 2.8230112299430123, "kl": 0.017120361328125, "learning_rate": 1.2155127339190037e-07, "loss": 0.0117, "num_tokens": 199650594.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9619041681289673, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01424313834174179, "rewards/wordcountpos_reward/raw_geo/std": 0.0664448530046341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715323, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 1173.25, "completions/mean_terminated_length": 1126.571533203125, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.911382276455291, "frac_reward_zero_std": 0.0, "grad_norm": 2.7244481066484907, "kl": 0.018096923828125, "learning_rate": 1.2145529883828973e-07, "loss": 0.0034, "num_tokens": 199702118.0, "reward": -2.9802322387695312e-08, "reward_std": 0.800751268863678, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.014137285522192917, "rewards/wordcountpos_reward/raw_geo/std": 0.055509196053591434, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1216.875, "completions/mean_terminated_length": 1216.875, "completions/min_length": 1065.0, "completions/min_terminated_length": 1065.0, "epoch": 0.9115823164632927, "frac_reward_zero_std": 0.0, "grad_norm": 3.3321197028351524, "kl": 0.0186767578125, "learning_rate": 1.2135953324502138e-07, "loss": 0.0383, "num_tokens": 199749444.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7904869318008423, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12609037902009063, "rewards/wordcountpos_reward/raw_geo/std": 0.13089329181084483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08509254221575907, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1105.45458984375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.9117823564712942, "frac_reward_zero_std": 0.0, "grad_norm": 3.158735045877238, "kl": 0.018157958984375, "learning_rate": 1.2126397665879108e-07, "loss": 0.0067, "num_tokens": 199802760.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9964972734451294, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09713488044498053, "rewards/wordcountpos_reward/raw_geo/std": 0.07901991952474625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1190.1875, "completions/mean_terminated_length": 1118.6923828125, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.9119823964792959, "frac_reward_zero_std": 0.0, "grad_norm": 3.2524175566229614, "kl": 0.0169219970703125, "learning_rate": 1.2116862912619258e-07, "loss": -0.0825, "num_tokens": 199853251.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0133785009384155, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15489613202732677, "rewards/wordcountpos_reward/raw_geo/std": 0.4265892609269343, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1203.875, "completions/mean_terminated_length": 1203.875, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.9121824364872975, "frac_reward_zero_std": 0.0, "grad_norm": 3.032594406041846, "kl": 0.015533447265625, "learning_rate": 1.2107349069371778e-07, "loss": -0.0217, "num_tokens": 199888905.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0402635335922241, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.049366676272681015, "rewards/wordcountpos_reward/raw_geo/std": 0.060322223215789464, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1031898645611484, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1426.5, "completions/mean_terminated_length": 1402.0, "completions/min_length": 1308.0, "completions/min_terminated_length": 1308.0, "epoch": 0.912382476495299, "frac_reward_zero_std": 0.0, "grad_norm": 2.59306278653199, "kl": 0.0147857666015625, "learning_rate": 1.2097856140775656e-07, "loss": 0.0033, "num_tokens": 199940761.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9445935487747192, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11126013363453316, "rewards/wordcountpos_reward/raw_geo/std": 0.04186799916317723, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07685966046898336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1226.0, "completions/mean_terminated_length": 1186.857177734375, "completions/min_length": 956.0, "completions/min_terminated_length": 956.0, "epoch": 0.9125825165033007, "frac_reward_zero_std": 0.0, "grad_norm": 2.856351938736259, "kl": 0.0148773193359375, "learning_rate": 1.208838413145971e-07, "loss": -0.009, "num_tokens": 199979033.0, "reward": 2.9802322387695312e-08, "reward_std": 0.798927903175354, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06360200728731508, "rewards/wordcountpos_reward/raw_geo/std": 0.0806278896186646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1157.875, "completions/mean_terminated_length": 1157.875, "completions/min_length": 1001.0, "completions/min_terminated_length": 1001.0, "epoch": 0.9127825565113022, "frac_reward_zero_std": 0.0, "grad_norm": 2.8838223570066686, "kl": 0.0135650634765625, "learning_rate": 1.2078933046042502e-07, "loss": -0.0103, "num_tokens": 200026639.0, "reward": 7.450580596923828e-09, "reward_std": 0.997371256351471, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11651911793027404, "rewards/wordcountpos_reward/raw_geo/std": 0.07484301816550512, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1122.1875, "completions/mean_terminated_length": 1097.0, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.9129825965193039, "frac_reward_zero_std": 0.0, "grad_norm": 2.3331032795797055, "kl": 0.011199951171875, "learning_rate": 1.2069502889132457e-07, "loss": -0.0331, "num_tokens": 200070330.0, "reward": 7.450580596923828e-09, "reward_std": 0.9419071078300476, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.14364761040678314, "rewards/wordcountpos_reward/raw_geo/std": 0.1009477072398519, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.11538983843829065, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 958.8125, "completions/mean_terminated_length": 958.8125, "completions/min_length": 636.0, "completions/min_terminated_length": 636.0, "epoch": 0.9131826365273055, "frac_reward_zero_std": 0.0, "grad_norm": 3.499279623298663, "kl": 0.0162811279296875, "learning_rate": 1.206009366532774e-07, "loss": 0.0084, "num_tokens": 200109887.0, "reward": 0.0, "reward_std": 0.6438637971878052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11358047942630826, "rewards/wordcountpos_reward/raw_geo/std": 0.07450600363894191, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1270.0, "completions/max_terminated_length": 1270.0, "completions/mean_length": 1017.875, "completions/mean_terminated_length": 1017.875, "completions/min_length": 707.0, "completions/min_terminated_length": 707.0, "epoch": 0.913382676535307, "frac_reward_zero_std": 0.0, "grad_norm": 4.047815915062073, "kl": 0.02545166015625, "learning_rate": 1.2050705379216344e-07, "loss": -0.0348, "num_tokens": 200150669.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0329232215881348, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06918795604883933, "rewards/wordcountpos_reward/raw_geo/std": 0.14452753394381548, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 1025.625, "completions/mean_terminated_length": 1025.625, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.9135827165433087, "frac_reward_zero_std": 0.0, "grad_norm": 2.9845077950056416, "kl": 0.01446533203125, "learning_rate": 1.2041338035376022e-07, "loss": -0.0046, "num_tokens": 200190287.0, "reward": 0.0, "reward_std": 0.7751548886299133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11273907490141118, "rewards/wordcountpos_reward/raw_geo/std": 0.20406165927258288, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 1161.0625, "completions/mean_terminated_length": 1112.6429443359375, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.9137827565513102, "frac_reward_zero_std": 0.0, "grad_norm": 3.007676244862513, "kl": 0.0161590576171875, "learning_rate": 1.203199163837435e-07, "loss": -0.0268, "num_tokens": 200226264.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7679302096366882, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07389850889941517, "rewards/wordcountpos_reward/raw_geo/std": 0.13852856676388314, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1151.5, "completions/mean_terminated_length": 1128.2667236328125, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.9139827965593119, "frac_reward_zero_std": 0.0, "grad_norm": 2.5177985242051957, "kl": 0.0120849609375, "learning_rate": 1.2022666192768673e-07, "loss": -0.0205, "num_tokens": 200266144.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7809904217720032, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10062273310553131, "rewards/wordcountpos_reward/raw_geo/std": 0.18117351632392678, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746356, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 1283.4375, "completions/mean_terminated_length": 1269.0001220703125, "completions/min_length": 1026.0, "completions/min_terminated_length": 1026.0, "epoch": 0.9141828365673135, "frac_reward_zero_std": 0.0, "grad_norm": 3.0711523413888284, "kl": 0.0169830322265625, "learning_rate": 1.2013361703106104e-07, "loss": -0.0272, "num_tokens": 200305271.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6660510897636414, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05328936491960522, "rewards/wordcountpos_reward/raw_geo/std": 0.28116839266601756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1164283279771532, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 950.0625, "completions/mean_terminated_length": 950.0625, "completions/min_length": 654.0, "completions/min_terminated_length": 654.0, "epoch": 0.914382876575315, "frac_reward_zero_std": 0.0, "grad_norm": 2.906175537244659, "kl": 0.0142974853515625, "learning_rate": 1.2004078173923584e-07, "loss": 0.0004, "num_tokens": 200337832.0, "reward": 0.0, "reward_std": 0.7136956453323364, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0572940232634898, "rewards/wordcountpos_reward/raw_geo/std": 0.06969348256286509, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1319.875, "completions/mean_terminated_length": 1179.77783203125, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9145829165833167, "frac_reward_zero_std": 0.0, "grad_norm": 2.59373758874467, "kl": 0.0118255615234375, "learning_rate": 1.1994815609747774e-07, "loss": -0.0396, "num_tokens": 200383662.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0447392463684082, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16469734303247444, "rewards/wordcountpos_reward/raw_geo/std": 0.060434908593348595, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 1072.625, "completions/mean_terminated_length": 1072.625, "completions/min_length": 876.0, "completions/min_terminated_length": 876.0, "epoch": 0.9147829565913183, "frac_reward_zero_std": 0.0, "grad_norm": 3.5887565300544955, "kl": 0.0173492431640625, "learning_rate": 1.1985574015095156e-07, "loss": 0.0103, "num_tokens": 200415552.0, "reward": 1.4901161193847656e-08, "reward_std": 0.979857325553894, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10148864626234877, "rewards/wordcountpos_reward/raw_geo/std": 0.1858000217953469, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1291.4375, "completions/mean_terminated_length": 1129.2222900390625, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.9149829965993198, "frac_reward_zero_std": 0.0, "grad_norm": 3.0462197976939716, "kl": 0.025177001953125, "learning_rate": 1.1976353394471976e-07, "loss": -0.005, "num_tokens": 200470039.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6360238194465637, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07596487521512474, "rewards/wordcountpos_reward/raw_geo/std": 0.09053306943533199, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242312, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1155.8125, "completions/mean_terminated_length": 1155.8125, "completions/min_length": 777.0, "completions/min_terminated_length": 777.0, "epoch": 0.9151830366073215, "frac_reward_zero_std": 0.0, "grad_norm": 3.0968033655248854, "kl": 0.017486572265625, "learning_rate": 1.1967153752374255e-07, "loss": -0.0092, "num_tokens": 200504132.0, "reward": 1.4901161193847656e-08, "reward_std": 0.7907376885414124, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12972931136589516, "rewards/wordcountpos_reward/raw_geo/std": 0.1734125633878029, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08850612031567837, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1241.0, "completions/max_terminated_length": 1241.0, "completions/mean_length": 1016.625, "completions/mean_terminated_length": 1016.625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.915383076615323, "frac_reward_zero_std": 0.0, "grad_norm": 3.192418014360831, "kl": 0.013092041015625, "learning_rate": 1.195797509328776e-07, "loss": -0.0203, "num_tokens": 200538998.0, "reward": 0.0, "reward_std": 1.0338462591171265, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1309321761811403, "rewards/wordcountpos_reward/raw_geo/std": 0.10039642792942327, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13662601021279466, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 1104.25, "completions/mean_terminated_length": 1104.25, "completions/min_length": 928.0, "completions/min_terminated_length": 928.0, "epoch": 0.9155831166233247, "frac_reward_zero_std": 0.0, "grad_norm": 3.369329285829773, "kl": 0.0189056396484375, "learning_rate": 1.1948817421688066e-07, "loss": -0.0259, "num_tokens": 200578794.0, "reward": 2.9802322387695312e-08, "reward_std": 1.000028133392334, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06707984225275382, "rewards/wordcountpos_reward/raw_geo/std": 0.04226257279408768, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1046.9375, "completions/mean_terminated_length": 1046.9375, "completions/min_length": 784.0, "completions/min_terminated_length": 784.0, "epoch": 0.9157831566313263, "frac_reward_zero_std": 0.0, "grad_norm": 2.7925029001590533, "kl": 0.015514373779296875, "learning_rate": 1.193968074204048e-07, "loss": -0.0001, "num_tokens": 200618009.0, "reward": 0.0, "reward_std": 0.7778029441833496, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.004133903004563955, "rewards/wordcountpos_reward/raw_geo/std": 0.08664537684395404, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1141.8125, "completions/mean_terminated_length": 1059.1539306640625, "completions/min_length": 706.0, "completions/min_terminated_length": 706.0, "epoch": 0.9159831966393278, "frac_reward_zero_std": 0.0, "grad_norm": 2.8725821957446627, "kl": 0.0138092041015625, "learning_rate": 1.193056505880011e-07, "loss": -0.0847, "num_tokens": 200662574.0, "reward": 0.0, "reward_std": 1.0150763988494873, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13216903296277055, "rewards/wordcountpos_reward/raw_geo/std": 0.18432252006460134, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1275.6875, "completions/mean_terminated_length": 1141.0999755859375, "completions/min_length": 884.0, "completions/min_terminated_length": 884.0, "epoch": 0.9161832366473295, "frac_reward_zero_std": 0.5, "grad_norm": 1.9447492124373582, "kl": 0.00977325439453125, "learning_rate": 1.192147037641178e-07, "loss": -0.0015, "num_tokens": 200697921.0, "reward": -2.2351741790771484e-08, "reward_std": 0.7420405745506287, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026595443540113624, "rewards/wordcountpos_reward/raw_geo/std": 0.14628830697043643, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 1.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1350.125, "completions/mean_terminated_length": 1260.2000732421875, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.916383276655331, "frac_reward_zero_std": 0.0, "grad_norm": 2.7788647947115086, "kl": 0.01351165771484375, "learning_rate": 1.1912396699310118e-07, "loss": 0.0257, "num_tokens": 200750835.0, "reward": 3.725290298461914e-09, "reward_std": 1.0594388246536255, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04821613387203938, "rewards/wordcountpos_reward/raw_geo/std": 0.3626666165713438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 1081.3125, "completions/mean_terminated_length": 1081.3125, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.9165833166633327, "frac_reward_zero_std": 0.0, "grad_norm": 3.351407158622743, "kl": 0.0156097412109375, "learning_rate": 1.1903344031919474e-07, "loss": 0.0236, "num_tokens": 200787880.0, "reward": -4.470348358154297e-08, "reward_std": 0.928205132484436, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08732778464317768, "rewards/wordcountpos_reward/raw_geo/std": 0.09478169622502357, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1231.0, "completions/mean_length": 1047.4375, "completions/mean_terminated_length": 1017.2667236328125, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.9167833566713343, "frac_reward_zero_std": 0.0, "grad_norm": 3.315474262588241, "kl": 0.0181732177734375, "learning_rate": 1.1894312378653987e-07, "loss": -0.0271, "num_tokens": 200825951.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9078887701034546, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0895773284002155, "rewards/wordcountpos_reward/raw_geo/std": 0.08609673021695265, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1199.8125, "completions/mean_terminated_length": 1130.5384521484375, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.9169833966793358, "frac_reward_zero_std": 0.0, "grad_norm": 2.236131607836573, "kl": 0.01009368896484375, "learning_rate": 1.1885301743917517e-07, "loss": -0.0096, "num_tokens": 200866980.0, "reward": 0.0, "reward_std": 0.8866041302680969, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12837804227757024, "rewards/wordcountpos_reward/raw_geo/std": 0.10586593983340593, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1281.375, "completions/mean_terminated_length": 1266.800048828125, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.9171834366873375, "frac_reward_zero_std": 0.0, "grad_norm": 2.90603285210737, "kl": 0.015167236328125, "learning_rate": 1.1876312132103698e-07, "loss": -0.0087, "num_tokens": 200926810.0, "reward": 0.0, "reward_std": 0.9037014245986938, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0050359018943191974, "rewards/wordcountpos_reward/raw_geo/std": 0.09066731297361526, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11979921473804349, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1078.4375, "completions/mean_terminated_length": 1078.4375, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.917383476695339, "frac_reward_zero_std": 0.0, "grad_norm": 3.6996946919673954, "kl": 0.0264892578125, "learning_rate": 1.1867343547595902e-07, "loss": -0.0267, "num_tokens": 200968345.0, "reward": 0.0, "reward_std": 0.6030782461166382, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09457879298124401, "rewards/wordcountpos_reward/raw_geo/std": 0.05925861940849215, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 868.8125, "completions/mean_terminated_length": 868.8125, "completions/min_length": 627.0, "completions/min_terminated_length": 627.0, "epoch": 0.9175835167033407, "frac_reward_zero_std": 0.0, "grad_norm": 4.512895041577385, "kl": 0.022186279296875, "learning_rate": 1.1858395994767259e-07, "loss": -0.0209, "num_tokens": 201006806.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8929404020309448, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.029472017670548432, "rewards/wordcountpos_reward/raw_geo/std": 0.2656595488683942, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1402.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1078.125, "completions/mean_terminated_length": 1078.125, "completions/min_length": 901.0, "completions/min_terminated_length": 901.0, "epoch": 0.9177835567113423, "frac_reward_zero_std": 0.0, "grad_norm": 2.7703223101109433, "kl": 0.0140228271484375, "learning_rate": 1.1849469477980624e-07, "loss": -0.0215, "num_tokens": 201047528.0, "reward": 0.0, "reward_std": 0.8650338649749756, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0819401168046744, "rewards/wordcountpos_reward/raw_geo/std": 0.09626368450233638, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1222.8125, "completions/mean_terminated_length": 1158.84619140625, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.9179835967193438, "frac_reward_zero_std": 0.0, "grad_norm": 2.686144560128966, "kl": 0.012054443359375, "learning_rate": 1.1840564001588616e-07, "loss": 0.0083, "num_tokens": 201098893.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8799788355827332, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10612919543361019, "rewards/wordcountpos_reward/raw_geo/std": 0.2437238327727692, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1430.0, "completions/max_terminated_length": 1430.0, "completions/mean_length": 1157.625, "completions/mean_terminated_length": 1157.625, "completions/min_length": 979.0, "completions/min_terminated_length": 979.0, "epoch": 0.9181836367273455, "frac_reward_zero_std": 0.0, "grad_norm": 3.0243025866132283, "kl": 0.0166168212890625, "learning_rate": 1.1831679569933586e-07, "loss": -0.0489, "num_tokens": 201144975.0, "reward": 0.0, "reward_std": 1.0632325410842896, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010718891415854939, "rewards/wordcountpos_reward/raw_geo/std": 0.12860511402742875, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1075.0625, "completions/mean_terminated_length": 1075.0625, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.918383676735347, "frac_reward_zero_std": 0.0, "grad_norm": 2.361010939044257, "kl": 0.013092041015625, "learning_rate": 1.1822816187347622e-07, "loss": -0.0257, "num_tokens": 201182672.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6852699518203735, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1205814253027778, "rewards/wordcountpos_reward/raw_geo/std": 0.2683758863044138, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09179284245476838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1303.8125, "completions/mean_terminated_length": 1186.0999755859375, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.9185837167433487, "frac_reward_zero_std": 0.0, "grad_norm": 2.641316254817193, "kl": 0.0125579833984375, "learning_rate": 1.1813973858152541e-07, "loss": 0.036, "num_tokens": 201234077.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9407966136932373, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.045537558506659734, "rewards/wordcountpos_reward/raw_geo/std": 0.15192245465384346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1073.5625, "completions/mean_terminated_length": 1073.5625, "completions/min_length": 967.0, "completions/min_terminated_length": 967.0, "epoch": 0.9187837567513503, "frac_reward_zero_std": 0.0, "grad_norm": 2.579444307942816, "kl": 0.01259613037109375, "learning_rate": 1.1805152586659917e-07, "loss": -0.0116, "num_tokens": 201267598.0, "reward": 7.450580596923828e-09, "reward_std": 1.032848834991455, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.04000143990077469, "rewards/wordcountpos_reward/raw_geo/std": 0.08774723667069098, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 938.875, "completions/mean_terminated_length": 938.875, "completions/min_length": 634.0, "completions/min_terminated_length": 634.0, "epoch": 0.9189837967593518, "frac_reward_zero_std": 0.0, "grad_norm": 3.78105012211775, "kl": 0.0164031982421875, "learning_rate": 1.1796352377171033e-07, "loss": -0.0229, "num_tokens": 201306084.0, "reward": 0.0, "reward_std": 0.721058189868927, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10439054848938181, "rewards/wordcountpos_reward/raw_geo/std": 0.136446752973275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11666666666666668, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1176.0, "completions/mean_length": 1154.6875, "completions/mean_terminated_length": 1039.5833740234375, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.9191838367673535, "frac_reward_zero_std": 0.0, "grad_norm": 2.8099770688253787, "kl": 0.01788330078125, "learning_rate": 1.1787573233976912e-07, "loss": -0.0697, "num_tokens": 201354911.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0183337926864624, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037924586309986, "rewards/wordcountpos_reward/raw_geo/std": 0.06356282743922685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1047.875, "completions/mean_terminated_length": 1047.875, "completions/min_length": 756.0, "completions/min_terminated_length": 756.0, "epoch": 0.919383876775355, "frac_reward_zero_std": 0.0, "grad_norm": 3.2263938613326597, "kl": 0.0162353515625, "learning_rate": 1.1778815161358311e-07, "loss": -0.0212, "num_tokens": 201392813.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9527865052223206, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.08764326918407382, "rewards/wordcountpos_reward/raw_geo/std": 0.09903744041119932, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 975.625, "completions/mean_terminated_length": 940.6666870117188, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.9195839167833567, "frac_reward_zero_std": 0.0, "grad_norm": 3.4557494247938743, "kl": 0.01983642578125, "learning_rate": 1.17700781635857e-07, "loss": 0.0093, "num_tokens": 201430687.0, "reward": 0.0, "reward_std": 0.7818318605422974, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03322554319301665, "rewards/wordcountpos_reward/raw_geo/std": 0.03578332531485256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1159.3125, "completions/mean_terminated_length": 1110.6429443359375, "completions/min_length": 702.0, "completions/min_terminated_length": 702.0, "epoch": 0.9197839567913583, "frac_reward_zero_std": 0.0, "grad_norm": 3.1572341206734706, "kl": 0.017364501953125, "learning_rate": 1.1761362244919276e-07, "loss": 0.0591, "num_tokens": 201467100.0, "reward": 0.0, "reward_std": 0.5503443479537964, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24261977459900133, "rewards/wordcountpos_reward/raw_geo/std": 0.09585374302015144, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 1040.1875, "completions/mean_terminated_length": 1040.1875, "completions/min_length": 852.0, "completions/min_terminated_length": 852.0, "epoch": 0.9199839967993598, "frac_reward_zero_std": 0.0, "grad_norm": 2.9817760065936856, "kl": 0.018310546875, "learning_rate": 1.1752667409608965e-07, "loss": -0.0373, "num_tokens": 201507327.0, "reward": 0.0, "reward_std": 0.44107353687286377, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04656905868706116, "rewards/wordcountpos_reward/raw_geo/std": 0.3299565968519596, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09259629622222518, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1146.8125, "completions/mean_terminated_length": 1123.2667236328125, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.9201840368073615, "frac_reward_zero_std": 0.0, "grad_norm": 2.763207925531254, "kl": 0.015716552734375, "learning_rate": 1.1743993661894404e-07, "loss": 0.006, "num_tokens": 201542924.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6804339289665222, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008609311799404192, "rewards/wordcountpos_reward/raw_geo/std": 0.039079272618868295, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10318986456114838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1330.0, "completions/max_terminated_length": 1330.0, "completions/mean_length": 1105.0625, "completions/mean_terminated_length": 1105.0625, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.920384076815363, "frac_reward_zero_std": 0.0, "grad_norm": 3.8265312603595043, "kl": 0.02069091796875, "learning_rate": 1.1735341006004964e-07, "loss": 0.0055, "num_tokens": 201593101.0, "reward": 0.0, "reward_std": 1.0394377708435059, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026175421899040513, "rewards/wordcountpos_reward/raw_geo/std": 0.1434372868133961, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12224747213928168, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 1276.8125, "completions/mean_terminated_length": 1175.3636474609375, "completions/min_length": 1073.0, "completions/min_terminated_length": 1073.0, "epoch": 0.9205841168233647, "frac_reward_zero_std": 0.0, "grad_norm": 2.4270700667315337, "kl": 0.00986480712890625, "learning_rate": 1.1726709446159701e-07, "loss": 0.0127, "num_tokens": 201640426.0, "reward": 0.0, "reward_std": 0.7299476265907288, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02871584038057125, "rewards/wordcountpos_reward/raw_geo/std": 0.062401103285123764, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11792967144619461, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 959.9375, "completions/mean_terminated_length": 882.7857666015625, "completions/min_length": 545.0, "completions/min_terminated_length": 545.0, "epoch": 0.9207841568313663, "frac_reward_zero_std": 0.0, "grad_norm": 3.2976257198201835, "kl": 0.015411376953125, "learning_rate": 1.1718098986567413e-07, "loss": 0.0358, "num_tokens": 201679161.0, "reward": 0.0, "reward_std": 0.7358053922653198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06030125561770057, "rewards/wordcountpos_reward/raw_geo/std": 0.0593507368033223, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1065.9375, "completions/mean_terminated_length": 1065.9375, "completions/min_length": 594.0, "completions/min_terminated_length": 594.0, "epoch": 0.9209841968393678, "frac_reward_zero_std": 0.0, "grad_norm": 2.1785764207475116, "kl": 0.0112457275390625, "learning_rate": 1.1709509631426599e-07, "loss": 0.0102, "num_tokens": 201711456.0, "reward": 2.60770320892334e-08, "reward_std": 1.0619239807128906, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12453381882699853, "rewards/wordcountpos_reward/raw_geo/std": 0.0900602081367177, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 1132.375, "completions/mean_terminated_length": 1107.86669921875, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9211842368473695, "frac_reward_zero_std": 0.0, "grad_norm": 3.2259859995907476, "kl": 0.015716552734375, "learning_rate": 1.1700941384925474e-07, "loss": -0.0077, "num_tokens": 201750062.0, "reward": 0.0, "reward_std": 1.0238144397735596, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09323843678939367, "rewards/wordcountpos_reward/raw_geo/std": 0.08034364923993258, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.10036968702787748, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1134.5625, "completions/mean_terminated_length": 968.45458984375, "completions/min_length": 658.0, "completions/min_terminated_length": 658.0, "epoch": 0.921384276855371, "frac_reward_zero_std": 0.0, "grad_norm": 3.128105729461136, "kl": 0.0191650390625, "learning_rate": 1.1692394251241937e-07, "loss": -0.0324, "num_tokens": 201792711.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8747562170028687, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0532166546385638, "rewards/wordcountpos_reward/raw_geo/std": 0.08745505706984047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 1097.6875, "completions/mean_terminated_length": 1097.6875, "completions/min_length": 674.0, "completions/min_terminated_length": 674.0, "epoch": 0.9215843168633727, "frac_reward_zero_std": 0.0, "grad_norm": 3.4827331291790538, "kl": 0.019317626953125, "learning_rate": 1.168386823454362e-07, "loss": -0.0242, "num_tokens": 201830010.0, "reward": 0.0, "reward_std": 0.8342228531837463, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0767819881484103, "rewards/wordcountpos_reward/raw_geo/std": 0.13346172073012885, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1074.5, "completions/mean_terminated_length": 1074.5, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.9217843568713743, "frac_reward_zero_std": 0.0, "grad_norm": 2.838677380871666, "kl": 0.0161285400390625, "learning_rate": 1.1675363338987849e-07, "loss": -0.018, "num_tokens": 201880338.0, "reward": 1.4901161193847656e-08, "reward_std": 1.018130898475647, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.042820428452592985, "rewards/wordcountpos_reward/raw_geo/std": 0.20653031976182115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1127.0, "completions/max_terminated_length": 1127.0, "completions/mean_length": 999.9375, "completions/mean_terminated_length": 999.9375, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.9219843968793758, "frac_reward_zero_std": 0.0, "grad_norm": 2.9310480852085825, "kl": 0.0132598876953125, "learning_rate": 1.1666879568721649e-07, "loss": -0.0372, "num_tokens": 201910777.0, "reward": 7.450580596923828e-09, "reward_std": 1.015517234802246, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.13901987729604617, "rewards/wordcountpos_reward/raw_geo/std": 0.06876905622269806, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1169.0625, "completions/mean_terminated_length": 1147.0, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.9221844368873775, "frac_reward_zero_std": 0.0, "grad_norm": 3.1871800885589803, "kl": 0.0131683349609375, "learning_rate": 1.1658416927881734e-07, "loss": -0.0617, "num_tokens": 201956322.0, "reward": 0.0, "reward_std": 0.9854879379272461, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026328292513005704, "rewards/wordcountpos_reward/raw_geo/std": 0.06006990428682475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746353, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1414.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1083.6875, "completions/mean_terminated_length": 1083.6875, "completions/min_length": 828.0, "completions/min_terminated_length": 828.0, "epoch": 0.9223844768953791, "frac_reward_zero_std": 0.0, "grad_norm": 2.7733846884935924, "kl": 0.016143798828125, "learning_rate": 1.1649975420594547e-07, "loss": 0.0086, "num_tokens": 201991301.0, "reward": 0.0, "reward_std": 0.7682340145111084, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05585934346609969, "rewards/wordcountpos_reward/raw_geo/std": 0.09013322286615244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03415650255319865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 974.875, "completions/mean_terminated_length": 974.875, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.9225845169033807, "frac_reward_zero_std": 0.0, "grad_norm": 3.533030675613623, "kl": 0.021942138671875, "learning_rate": 1.1641555050976184e-07, "loss": 0.0353, "num_tokens": 202034883.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9242318868637085, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19261770641181417, "rewards/wordcountpos_reward/raw_geo/std": 0.22083475488878437, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 1127.75, "completions/mean_terminated_length": 1041.84619140625, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.9227845569113823, "frac_reward_zero_std": 0.0, "grad_norm": 2.9107274323867123, "kl": 0.0143585205078125, "learning_rate": 1.1633155823132473e-07, "loss": -0.0073, "num_tokens": 202086151.0, "reward": 0.0, "reward_std": 0.7808202505111694, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06505362084610868, "rewards/wordcountpos_reward/raw_geo/std": 0.22797276212043918, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1386.6875, "completions/mean_terminated_length": 1298.5555419921875, "completions/min_length": 1100.0, "completions/min_terminated_length": 1100.0, "epoch": 0.9229845969193838, "frac_reward_zero_std": 0.0, "grad_norm": 2.4372787510861453, "kl": 0.00934600830078125, "learning_rate": 1.1624777741158897e-07, "loss": 0.0127, "num_tokens": 202134554.0, "reward": 0.0, "reward_std": 0.8294680118560791, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04840449803205141, "rewards/wordcountpos_reward/raw_geo/std": 0.08882352755652513, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 1036.75, "completions/mean_terminated_length": 1036.75, "completions/min_length": 656.0, "completions/min_terminated_length": 656.0, "epoch": 0.9231846369273855, "frac_reward_zero_std": 0.0, "grad_norm": 2.803110891190344, "kl": 0.0131072998046875, "learning_rate": 1.1616420809140668e-07, "loss": -0.023, "num_tokens": 202179550.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7340359687805176, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03783732651874786, "rewards/wordcountpos_reward/raw_geo/std": 0.05409960566703108, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09108400680852978, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1320.3125, "completions/mean_terminated_length": 1180.5555419921875, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.9233846769353871, "frac_reward_zero_std": 0.0, "grad_norm": 2.3332396771281165, "kl": 0.011077880859375, "learning_rate": 1.1608085031152647e-07, "loss": 0.0067, "num_tokens": 202228051.0, "reward": -7.450580596923828e-09, "reward_std": 1.0609760284423828, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.43684518603356576, "rewards/wordcountpos_reward/raw_geo/std": 0.12613426166800035, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 1031.1875, "completions/mean_terminated_length": 999.9334106445312, "completions/min_length": 765.0, "completions/min_terminated_length": 765.0, "epoch": 0.9235847169433887, "frac_reward_zero_std": 0.0, "grad_norm": 3.4675298379447894, "kl": 0.018951416015625, "learning_rate": 1.1599770411259401e-07, "loss": -0.0582, "num_tokens": 202267222.0, "reward": 1.4901161193847656e-08, "reward_std": 1.027290940284729, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03976654714553647, "rewards/wordcountpos_reward/raw_geo/std": 0.1026569957298017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1123.25, "completions/mean_terminated_length": 952.0, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.9237847569513903, "frac_reward_zero_std": 0.0, "grad_norm": 3.283528177062966, "kl": 0.0198516845703125, "learning_rate": 1.1591476953515179e-07, "loss": 0.0324, "num_tokens": 202319610.0, "reward": 0.0, "reward_std": 0.9899692535400391, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07228777244768064, "rewards/wordcountpos_reward/raw_geo/std": 0.19685638194921865, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.03191423692521126, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1070.75, "completions/mean_terminated_length": 1070.75, "completions/min_length": 687.0, "completions/min_terminated_length": 687.0, "epoch": 0.9239847969593918, "frac_reward_zero_std": 0.0, "grad_norm": 3.5639530474294414, "kl": 0.018646240234375, "learning_rate": 1.1583204661963915e-07, "loss": 0.0447, "num_tokens": 202368518.0, "reward": 0.0, "reward_std": 0.9454617500305176, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07260981932668083, "rewards/wordcountpos_reward/raw_geo/std": 0.06999243401727114, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1103.75, "completions/mean_terminated_length": 1103.75, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.9241848369673935, "frac_reward_zero_std": 0.0, "grad_norm": 3.475244439056834, "kl": 0.02069091796875, "learning_rate": 1.1574953540639208e-07, "loss": 0.0033, "num_tokens": 202415098.0, "reward": 0.0, "reward_std": 0.7272059321403503, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.23772171527314526, "rewards/wordcountpos_reward/raw_geo/std": 0.5409717327538065, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457553, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1125.8125, "completions/mean_terminated_length": 1125.8125, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.9243848769753951, "frac_reward_zero_std": 0.0, "grad_norm": 2.351960089519299, "kl": 0.01318359375, "learning_rate": 1.156672359356434e-07, "loss": -0.0245, "num_tokens": 202455087.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9889339208602905, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03423577510772599, "rewards/wordcountpos_reward/raw_geo/std": 0.0672047723740231, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1133.1875, "completions/mean_terminated_length": 1108.7333984375, "completions/min_length": 943.0, "completions/min_terminated_length": 943.0, "epoch": 0.9245849169833967, "frac_reward_zero_std": 0.0, "grad_norm": 2.371196232585918, "kl": 0.0100860595703125, "learning_rate": 1.155851482475228e-07, "loss": 0.044, "num_tokens": 202502146.0, "reward": 0.0, "reward_std": 0.588176965713501, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09338000243156196, "rewards/wordcountpos_reward/raw_geo/std": 0.06761451459368342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 990.0625, "completions/mean_terminated_length": 990.0625, "completions/min_length": 646.0, "completions/min_terminated_length": 646.0, "epoch": 0.9247849569913983, "frac_reward_zero_std": 0.0, "grad_norm": 3.601499624704347, "kl": 0.018280029296875, "learning_rate": 1.1550327238205645e-07, "loss": -0.0454, "num_tokens": 202531131.0, "reward": 0.0, "reward_std": 0.9371521472930908, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.006226510913901601, "rewards/wordcountpos_reward/raw_geo/std": 0.05206026403832367, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10174405069512345, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 896.625, "completions/mean_terminated_length": 896.625, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.9249849969993998, "frac_reward_zero_std": 0.0, "grad_norm": 3.844296152733578, "kl": 0.020538330078125, "learning_rate": 1.1542160837916754e-07, "loss": -0.0206, "num_tokens": 202562853.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0181255340576172, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1609076212597846, "rewards/wordcountpos_reward/raw_geo/std": 0.05328044449660982, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1234.5, "completions/mean_terminated_length": 1196.571533203125, "completions/min_length": 1058.0, "completions/min_terminated_length": 1058.0, "epoch": 0.9251850370074015, "frac_reward_zero_std": 0.0, "grad_norm": 3.1723425012179094, "kl": 0.01739501953125, "learning_rate": 1.1534015627867577e-07, "loss": 0.0167, "num_tokens": 202615333.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0667002201080322, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0008687591743895605, "rewards/wordcountpos_reward/raw_geo/std": 0.08429973574871405, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1204.0, "completions/max_terminated_length": 1204.0, "completions/mean_length": 935.125, "completions/mean_terminated_length": 935.125, "completions/min_length": 643.0, "completions/min_terminated_length": 643.0, "epoch": 0.9253850770154031, "frac_reward_zero_std": 0.0, "grad_norm": 3.7637871414487623, "kl": 0.01873779296875, "learning_rate": 1.1525891612029762e-07, "loss": -0.0199, "num_tokens": 202642863.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7164758443832397, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13114880477696308, "rewards/wordcountpos_reward/raw_geo/std": 0.15675524557651718, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1362.0, "completions/mean_length": 1205.6875, "completions/mean_terminated_length": 1107.5833740234375, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.9255851170234047, "frac_reward_zero_std": 0.0, "grad_norm": 2.4062983871354677, "kl": 0.0137481689453125, "learning_rate": 1.1517788794364596e-07, "loss": 0.0268, "num_tokens": 202690730.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0658891201019287, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0018813869254648476, "rewards/wordcountpos_reward/raw_geo/std": 0.120618319523178, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 1225.125, "completions/mean_terminated_length": 1100.181884765625, "completions/min_length": 951.0, "completions/min_terminated_length": 951.0, "epoch": 0.9257851570314063, "frac_reward_zero_std": 0.0, "grad_norm": 2.234798340904426, "kl": 0.0106353759765625, "learning_rate": 1.150970717882307e-07, "loss": -0.0275, "num_tokens": 202740884.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6074829697608948, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.13977426160668696, "rewards/wordcountpos_reward/raw_geo/std": 0.21264535051380754, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1057.25, "completions/mean_terminated_length": 1057.25, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.9259851970394078, "frac_reward_zero_std": 0.0, "grad_norm": 3.3051075683703597, "kl": 0.020751953125, "learning_rate": 1.1501646769345805e-07, "loss": -0.0269, "num_tokens": 202781608.0, "reward": -5.960464477539063e-08, "reward_std": 0.5945137739181519, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0550398893227672, "rewards/wordcountpos_reward/raw_geo/std": 0.16412012443840737, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0807373427759331, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 953.0, "completions/mean_terminated_length": 953.0, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.9261852370474095, "frac_reward_zero_std": 0.0, "grad_norm": 3.6027616292231626, "kl": 0.01483154296875, "learning_rate": 1.1493607569863104e-07, "loss": -0.0884, "num_tokens": 202807656.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8082252740859985, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03868311822489188, "rewards/wordcountpos_reward/raw_geo/std": 0.06349764594356595, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14907119849998599, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1395.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 944.125, "completions/mean_terminated_length": 944.125, "completions/min_length": 721.0, "completions/min_terminated_length": 721.0, "epoch": 0.9263852770554111, "frac_reward_zero_std": 0.0, "grad_norm": 3.4395259832917913, "kl": 0.0168914794921875, "learning_rate": 1.1485589584294903e-07, "loss": 0.0041, "num_tokens": 202848530.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9914979338645935, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11081661281611331, "rewards/wordcountpos_reward/raw_geo/std": 0.0772457656064107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 1037.4375, "completions/mean_terminated_length": 1037.4375, "completions/min_length": 836.0, "completions/min_terminated_length": 836.0, "epoch": 0.9265853170634126, "frac_reward_zero_std": 0.0, "grad_norm": 3.0591277527921337, "kl": 0.0144500732421875, "learning_rate": 1.1477592816550817e-07, "loss": -0.019, "num_tokens": 202883257.0, "reward": 4.470348358154297e-08, "reward_std": 0.9685467481613159, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.025843880165932247, "rewards/wordcountpos_reward/raw_geo/std": 0.08454641858988407, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1214.0, "completions/mean_length": 1048.4375, "completions/mean_terminated_length": 1018.3333740234375, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9267853570714143, "frac_reward_zero_std": 0.0, "grad_norm": 3.7851861513156853, "kl": 0.02008056640625, "learning_rate": 1.1469617270530107e-07, "loss": 0.0347, "num_tokens": 202914080.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5364655256271362, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11553698459077362, "rewards/wordcountpos_reward/raw_geo/std": 0.22346811158301663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1110.1875, "completions/mean_terminated_length": 1084.2000732421875, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.9269853970794159, "frac_reward_zero_std": 0.0, "grad_norm": 3.4861704323767673, "kl": 0.020751953125, "learning_rate": 1.1461662950121683e-07, "loss": -0.0529, "num_tokens": 202964675.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9913690090179443, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21604635152282034, "rewards/wordcountpos_reward/raw_geo/std": 0.1737460006443912, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1244.875, "completions/mean_terminated_length": 1227.86669921875, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.9271854370874175, "frac_reward_zero_std": 0.0, "grad_norm": 3.1487000154454643, "kl": 0.018890380859375, "learning_rate": 1.1453729859204106e-07, "loss": -0.0121, "num_tokens": 203009905.0, "reward": 9.313225746154785e-09, "reward_std": 1.0440714359283447, "rewards/wordcountpos_reward/mean": 9.313225746154785e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.06688716551917907, "rewards/wordcountpos_reward/raw_geo/std": 0.093752007317238, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1334.0, "completions/mean_terminated_length": 1295.6923828125, "completions/min_length": 1164.0, "completions/min_terminated_length": 1164.0, "epoch": 0.9273854770954191, "frac_reward_zero_std": 0.0, "grad_norm": 2.7210380088624655, "kl": 0.0121002197265625, "learning_rate": 1.1445818001645587e-07, "loss": 0.0183, "num_tokens": 203054673.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9133732318878174, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.038844830948348195, "rewards/wordcountpos_reward/raw_geo/std": 0.0715480842200092, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 1055.25, "completions/mean_terminated_length": 1055.25, "completions/min_length": 548.0, "completions/min_terminated_length": 548.0, "epoch": 0.9275855171034206, "frac_reward_zero_std": 0.0, "grad_norm": 3.3233930211609213, "kl": 0.0181884765625, "learning_rate": 1.1437927381303987e-07, "loss": -0.0423, "num_tokens": 203101101.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0019429922103882, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.18737944071715684, "rewards/wordcountpos_reward/raw_geo/std": 0.03010261075357618, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.05426273532033234, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1054.25, "completions/mean_terminated_length": 1054.25, "completions/min_length": 791.0, "completions/min_terminated_length": 791.0, "epoch": 0.9277855571114223, "frac_reward_zero_std": 0.0, "grad_norm": 2.866332868887854, "kl": 0.0150604248046875, "learning_rate": 1.1430058002026813e-07, "loss": 0.0339, "num_tokens": 203138681.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0098910331726074, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17378556596667608, "rewards/wordcountpos_reward/raw_geo/std": 0.1242712949452841, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1298.9375, "completions/mean_terminated_length": 1285.533447265625, "completions/min_length": 1166.0, "completions/min_terminated_length": 1166.0, "epoch": 0.9279855971194239, "frac_reward_zero_std": 0.0, "grad_norm": 2.581335595010073, "kl": 0.013702392578125, "learning_rate": 1.1422209867651197e-07, "loss": -0.0349, "num_tokens": 203183440.0, "reward": 0.0, "reward_std": 1.0226911306381226, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.27681866682912604, "rewards/wordcountpos_reward/raw_geo/std": 0.23821254754049218, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639736, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 1255.6875, "completions/mean_terminated_length": 1255.6875, "completions/min_length": 1020.0, "completions/min_terminated_length": 1020.0, "epoch": 0.9281856371274255, "frac_reward_zero_std": 0.0, "grad_norm": 3.3642387637135536, "kl": 0.0166778564453125, "learning_rate": 1.1414382982003937e-07, "loss": 0.0212, "num_tokens": 203222811.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0639452934265137, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.20547929827020908, "rewards/wordcountpos_reward/raw_geo/std": 0.08887291247937433, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 841.375, "completions/mean_terminated_length": 841.375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.9283856771354271, "frac_reward_zero_std": 0.0, "grad_norm": 4.047559278072514, "kl": 0.01922607421875, "learning_rate": 1.140657734890145e-07, "loss": -0.221, "num_tokens": 203263681.0, "reward": 0.0, "reward_std": 0.922298014163971, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022644237341602928, "rewards/wordcountpos_reward/raw_geo/std": 0.10381040996750311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.19925788241297684, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1305.0, "completions/max_terminated_length": 1305.0, "completions/mean_length": 994.0625, "completions/mean_terminated_length": 994.0625, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.9285857171434286, "frac_reward_zero_std": 0.0, "grad_norm": 3.939788511939653, "kl": 0.02362060546875, "learning_rate": 1.1398792972149804e-07, "loss": 0.0039, "num_tokens": 203301682.0, "reward": -5.21540641784668e-08, "reward_std": 1.0230185985565186, "rewards/wordcountpos_reward/mean": -5.21540641784668e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.014949612520901354, "rewards/wordcountpos_reward/raw_geo/std": 0.10967982694951493, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 908.0625, "completions/mean_terminated_length": 908.0625, "completions/min_length": 553.0, "completions/min_terminated_length": 553.0, "epoch": 0.9287857571514303, "frac_reward_zero_std": 0.0, "grad_norm": 3.934179124219554, "kl": 0.0186767578125, "learning_rate": 1.1391029855544687e-07, "loss": -0.0669, "num_tokens": 203333003.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9172544479370117, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1247270977525648, "rewards/wordcountpos_reward/raw_geo/std": 0.05671860167724538, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12049281521534186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1227.0, "completions/max_terminated_length": 1227.0, "completions/mean_length": 1088.6875, "completions/mean_terminated_length": 1088.6875, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.9289857971594319, "frac_reward_zero_std": 0.0, "grad_norm": 2.2582293342050135, "kl": 0.0140380859375, "learning_rate": 1.138328800287145e-07, "loss": -0.0253, "num_tokens": 203372462.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5279631614685059, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1217005954679705, "rewards/wordcountpos_reward/raw_geo/std": 0.24609983914106748, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1259.25, "completions/mean_terminated_length": 1179.0, "completions/min_length": 930.0, "completions/min_terminated_length": 930.0, "epoch": 0.9291858371674335, "frac_reward_zero_std": 0.0, "grad_norm": 2.8398712917914155, "kl": 0.01605224609375, "learning_rate": 1.1375567417905027e-07, "loss": -0.0003, "num_tokens": 203424746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9876142740249634, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.054087517534289885, "rewards/wordcountpos_reward/raw_geo/std": 0.1923054866623811, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.12931443160847217, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1100.5, "completions/mean_terminated_length": 1100.5, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9293858771754351, "frac_reward_zero_std": 0.0, "grad_norm": 2.926172742094113, "kl": 0.0188446044921875, "learning_rate": 1.1367868104410035e-07, "loss": -0.0156, "num_tokens": 203464170.0, "reward": 0.0, "reward_std": 0.8326346278190613, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09750968881506196, "rewards/wordcountpos_reward/raw_geo/std": 0.29781639216635564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1161.9375, "completions/mean_terminated_length": 1161.9375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.9295859171834366, "frac_reward_zero_std": 0.0, "grad_norm": 3.193846600176886, "kl": 0.0170135498046875, "learning_rate": 1.1360190066140678e-07, "loss": -0.0111, "num_tokens": 203502345.0, "reward": 0.0, "reward_std": 0.6843069195747375, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0673484737816687, "rewards/wordcountpos_reward/raw_geo/std": 0.08262855078166872, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1235.875, "completions/mean_terminated_length": 1174.923095703125, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.9297859571914383, "frac_reward_zero_std": 0.0, "grad_norm": 2.9439218277798793, "kl": 0.0166473388671875, "learning_rate": 1.1352533306840818e-07, "loss": -0.0573, "num_tokens": 203549631.0, "reward": 0.0, "reward_std": 0.5490013360977173, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09702399361524222, "rewards/wordcountpos_reward/raw_geo/std": 0.13100637210575752, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.15104573749303493, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1392.0, "completions/mean_length": 1271.25, "completions/mean_terminated_length": 1195.0, "completions/min_length": 553.0, "completions/min_terminated_length": 553.0, "epoch": 0.9299859971994399, "frac_reward_zero_std": 0.0, "grad_norm": 2.456297133053474, "kl": 0.014678955078125, "learning_rate": 1.1344897830243907e-07, "loss": -0.0464, "num_tokens": 203604595.0, "reward": 0.0, "reward_std": 0.7775891423225403, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010844023636786123, "rewards/wordcountpos_reward/raw_geo/std": 0.10299829986204055, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 1298.3125, "completions/mean_terminated_length": 1298.3125, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.9301860372074415, "frac_reward_zero_std": 0.0, "grad_norm": 2.9124700819645875, "kl": 0.0137176513671875, "learning_rate": 1.1337283640073054e-07, "loss": 0.0052, "num_tokens": 203647384.0, "reward": 2.2351741790771484e-08, "reward_std": 1.064143180847168, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1114293526730957, "rewards/wordcountpos_reward/raw_geo/std": 0.08944007274017969, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1168.71435546875, "completions/min_length": 690.0, "completions/min_terminated_length": 690.0, "epoch": 0.9303860772154431, "frac_reward_zero_std": 0.0, "grad_norm": 2.7429071121561126, "kl": 0.0126953125, "learning_rate": 1.1329690740040963e-07, "loss": -0.1038, "num_tokens": 203701218.0, "reward": 0.0, "reward_std": 0.9425488710403442, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06563348549536, "rewards/wordcountpos_reward/raw_geo/std": 0.07494142361740748, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437976, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 931.8125, "completions/mean_terminated_length": 931.8125, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.9305861172234446, "frac_reward_zero_std": 0.0, "grad_norm": 3.2313375219321356, "kl": 0.0146636962890625, "learning_rate": 1.1322119133849972e-07, "loss": -0.0273, "num_tokens": 203740471.0, "reward": 0.0, "reward_std": 0.9758516550064087, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10021897237729487, "rewards/wordcountpos_reward/raw_geo/std": 0.054340852636620855, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12382783747337807, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1247.6875, "completions/mean_terminated_length": 1211.6429443359375, "completions/min_length": 1064.0, "completions/min_terminated_length": 1064.0, "epoch": 0.9307861572314463, "frac_reward_zero_std": 0.0, "grad_norm": 3.3758554372441663, "kl": 0.0172271728515625, "learning_rate": 1.131456882519203e-07, "loss": -0.0137, "num_tokens": 203792066.0, "reward": 0.0, "reward_std": 0.8561545610427856, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.023404710619681607, "rewards/wordcountpos_reward/raw_geo/std": 0.07740248083768529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1215.125, "completions/mean_terminated_length": 1215.125, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.9309861972394479, "frac_reward_zero_std": 0.0, "grad_norm": 3.172345044985368, "kl": 0.02093505859375, "learning_rate": 1.1307039817748699e-07, "loss": -0.0203, "num_tokens": 203832156.0, "reward": 0.0, "reward_std": 1.004351258277893, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04534427221987374, "rewards/wordcountpos_reward/raw_geo/std": 0.09774208017809975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1327.375, "completions/mean_terminated_length": 1248.9091796875, "completions/min_length": 788.0, "completions/min_terminated_length": 788.0, "epoch": 0.9311862372474495, "frac_reward_zero_std": 0.0, "grad_norm": 3.325796829397579, "kl": 0.017547607421875, "learning_rate": 1.1299532115191152e-07, "loss": 0.0253, "num_tokens": 203877058.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0001996755599976, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05048554926398381, "rewards/wordcountpos_reward/raw_geo/std": 0.09500055606987162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1271.0, "completions/mean_length": 1156.4375, "completions/mean_terminated_length": 1107.357177734375, "completions/min_length": 945.0, "completions/min_terminated_length": 945.0, "epoch": 0.9313862772554511, "frac_reward_zero_std": 0.0, "grad_norm": 3.23361907716798, "kl": 0.0149078369140625, "learning_rate": 1.1292045721180194e-07, "loss": 0.0153, "num_tokens": 203915841.0, "reward": -3.725290298461914e-09, "reward_std": 1.0620183944702148, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0842753027311131, "rewards/wordcountpos_reward/raw_geo/std": 0.0772851006756475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1491.0, "completions/max_terminated_length": 1491.0, "completions/mean_length": 1158.125, "completions/mean_terminated_length": 1158.125, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.9315863172634526, "frac_reward_zero_std": 0.0, "grad_norm": 2.9509220897882846, "kl": 0.0152435302734375, "learning_rate": 1.1284580639366212e-07, "loss": -0.0168, "num_tokens": 203965987.0, "reward": 0.0, "reward_std": 0.567800760269165, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.16173861737605227, "rewards/wordcountpos_reward/raw_geo/std": 0.10240667888014307, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333334, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 1308.5625, "completions/mean_terminated_length": 1281.21435546875, "completions/min_length": 974.0, "completions/min_terminated_length": 974.0, "epoch": 0.9317863572714543, "frac_reward_zero_std": 0.0, "grad_norm": 3.147348914098359, "kl": 0.0157318115234375, "learning_rate": 1.1277136873389215e-07, "loss": -0.0208, "num_tokens": 204011132.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0344572067260742, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.037327148859003616, "rewards/wordcountpos_reward/raw_geo/std": 0.05720004869739846, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820635, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1393.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1089.125, "completions/mean_terminated_length": 1089.125, "completions/min_length": 888.0, "completions/min_terminated_length": 888.0, "epoch": 0.9319863972794559, "frac_reward_zero_std": 0.0, "grad_norm": 3.104838744987011, "kl": 0.0171661376953125, "learning_rate": 1.1269714426878821e-07, "loss": -0.0456, "num_tokens": 204056062.0, "reward": 0.0, "reward_std": 0.957156777381897, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1300816516065738, "rewards/wordcountpos_reward/raw_geo/std": 0.10383802940188491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1471.0, "completions/mean_length": 1189.3125, "completions/mean_terminated_length": 1144.9285888671875, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.9321864372874575, "frac_reward_zero_std": 0.0, "grad_norm": 3.031696113690883, "kl": 0.018524169921875, "learning_rate": 1.1262313303454254e-07, "loss": -0.0688, "num_tokens": 204102091.0, "reward": 0.0, "reward_std": 0.3497549295425415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08949824643781196, "rewards/wordcountpos_reward/raw_geo/std": 0.1779411171921043, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11155467020454342, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1403.0, "completions/mean_length": 1090.6875, "completions/mean_terminated_length": 1063.4000244140625, "completions/min_length": 883.0, "completions/min_terminated_length": 883.0, "epoch": 0.9323864772954591, "frac_reward_zero_std": 0.0, "grad_norm": 2.799941104996639, "kl": 0.0136566162109375, "learning_rate": 1.1254933506724323e-07, "loss": -0.0058, "num_tokens": 204146446.0, "reward": 5.21540641784668e-08, "reward_std": 1.0501623153686523, "rewards/wordcountpos_reward/mean": 5.21540641784668e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.3023834459034345, "rewards/wordcountpos_reward/raw_geo/std": 0.1462773976935057, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460886, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 1110.5, "completions/mean_terminated_length": 1110.5, "completions/min_length": 989.0, "completions/min_terminated_length": 989.0, "epoch": 0.9325865173034606, "frac_reward_zero_std": 0.0, "grad_norm": 3.172012175923581, "kl": 0.0148468017578125, "learning_rate": 1.1247575040287457e-07, "loss": -0.0104, "num_tokens": 204178462.0, "reward": 0.0, "reward_std": 0.9222646951675415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.010295271773471336, "rewards/wordcountpos_reward/raw_geo/std": 0.07624270058174522, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1013.3125, "completions/mean_terminated_length": 1013.3125, "completions/min_length": 742.0, "completions/min_terminated_length": 742.0, "epoch": 0.9327865573114623, "frac_reward_zero_std": 0.0, "grad_norm": 2.847176078091671, "kl": 0.0134429931640625, "learning_rate": 1.124023790773168e-07, "loss": -0.0123, "num_tokens": 204219939.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8372703194618225, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.060355155588745026, "rewards/wordcountpos_reward/raw_geo/std": 0.15118898372434814, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1158.875, "completions/mean_terminated_length": 1158.875, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.9329865973194639, "frac_reward_zero_std": 0.0, "grad_norm": 3.2665940548703523, "kl": 0.01666259765625, "learning_rate": 1.1232922112634608e-07, "loss": -0.0464, "num_tokens": 204270961.0, "reward": 0.0, "reward_std": 0.6147686243057251, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14842566333550794, "rewards/wordcountpos_reward/raw_geo/std": 0.3185629319681234, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.18968785275151775, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 1159.5625, "completions/mean_terminated_length": 1159.5625, "completions/min_length": 837.0, "completions/min_terminated_length": 837.0, "epoch": 0.9331866373274655, "frac_reward_zero_std": 0.0, "grad_norm": 2.8131596114094113, "kl": 0.0160369873046875, "learning_rate": 1.122562765856345e-07, "loss": -0.0356, "num_tokens": 204313050.0, "reward": 0.0, "reward_std": 0.6066786050796509, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08468267342963362, "rewards/wordcountpos_reward/raw_geo/std": 0.1562544274239941, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 1188.625, "completions/mean_terminated_length": 1188.625, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9333866773354671, "frac_reward_zero_std": 0.0, "grad_norm": 3.206195688026458, "kl": 0.0184326171875, "learning_rate": 1.1218354549075025e-07, "loss": -0.0365, "num_tokens": 204361132.0, "reward": 0.0, "reward_std": 0.9040176272392273, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.012574855076708772, "rewards/wordcountpos_reward/raw_geo/std": 0.11087515876426189, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1202.375, "completions/mean_terminated_length": 1202.375, "completions/min_length": 1055.0, "completions/min_terminated_length": 1055.0, "epoch": 0.9335867173434687, "frac_reward_zero_std": 0.0, "grad_norm": 2.905820108364047, "kl": 0.0111083984375, "learning_rate": 1.1211102787715729e-07, "loss": -0.0044, "num_tokens": 204393514.0, "reward": 0.0, "reward_std": 1.0084445476531982, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07531439705604057, "rewards/wordcountpos_reward/raw_geo/std": 0.04012283611724082, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1208.875, "completions/mean_terminated_length": 1208.875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.9337867573514703, "frac_reward_zero_std": 0.0, "grad_norm": 2.492297972809035, "kl": 0.010406494140625, "learning_rate": 1.1203872378021554e-07, "loss": -0.0006, "num_tokens": 204429264.0, "reward": 0.0, "reward_std": 0.7670354843139648, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1335414775215522, "rewards/wordcountpos_reward/raw_geo/std": 0.1261113164816312, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1313.25, "completions/mean_terminated_length": 1300.800048828125, "completions/min_length": 1045.0, "completions/min_terminated_length": 1045.0, "epoch": 0.9339867973594719, "frac_reward_zero_std": 0.0, "grad_norm": 2.735706340659568, "kl": 0.01983642578125, "learning_rate": 1.1196663323518085e-07, "loss": -0.0045, "num_tokens": 204481124.0, "reward": 0.0, "reward_std": 0.8851456642150879, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03410911646083338, "rewards/wordcountpos_reward/raw_geo/std": 0.12817011326456235, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382573, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1470.0, "completions/max_terminated_length": 1470.0, "completions/mean_length": 1185.8125, "completions/mean_terminated_length": 1185.8125, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.9341868373674735, "frac_reward_zero_std": 0.0, "grad_norm": 3.1454582691729587, "kl": 0.018341064453125, "learning_rate": 1.1189475627720488e-07, "loss": -0.0207, "num_tokens": 204532017.0, "reward": -2.9802322387695312e-08, "reward_std": 0.706138014793396, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12100292829955343, "rewards/wordcountpos_reward/raw_geo/std": 0.06776672172306854, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568497, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1490.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1252.1875, "completions/mean_terminated_length": 1252.1875, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.9343868773754751, "frac_reward_zero_std": 0.0, "grad_norm": 2.714694800055772, "kl": 0.0130615234375, "learning_rate": 1.118230929413351e-07, "loss": -0.0153, "num_tokens": 204577452.0, "reward": 0.0, "reward_std": 0.5581710934638977, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15004262726487333, "rewards/wordcountpos_reward/raw_geo/std": 0.19834045767953756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1351.0625, "completions/mean_terminated_length": 1261.7000732421875, "completions/min_length": 1116.0, "completions/min_terminated_length": 1116.0, "epoch": 0.9345869173834767, "frac_reward_zero_std": 0.0, "grad_norm": 3.0917481519619057, "kl": 0.01898193359375, "learning_rate": 1.1175164326251491e-07, "loss": 0.0116, "num_tokens": 204634101.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9963586330413818, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09072021112066914, "rewards/wordcountpos_reward/raw_geo/std": 0.17510099613814184, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1383.0, "completions/max_terminated_length": 1383.0, "completions/mean_length": 1087.3125, "completions/mean_terminated_length": 1087.3125, "completions/min_length": 740.0, "completions/min_terminated_length": 740.0, "epoch": 0.9347869573914783, "frac_reward_zero_std": 0.0, "grad_norm": 3.7709093706225305, "kl": 0.023223876953125, "learning_rate": 1.1168040727558353e-07, "loss": -0.006, "num_tokens": 204683794.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0671697854995728, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.46836172548917904, "rewards/wordcountpos_reward/raw_geo/std": 0.5279998843020336, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0925962962222252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1189.75, "completions/mean_terminated_length": 1169.0667724609375, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.9349869973994799, "frac_reward_zero_std": 0.0, "grad_norm": 3.2207193705950363, "kl": 0.0181427001953125, "learning_rate": 1.1160938501527592e-07, "loss": -0.0292, "num_tokens": 204729990.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9303275346755981, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.28288051842953577, "rewards/wordcountpos_reward/raw_geo/std": 0.12375658912302342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027818, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1326.375, "completions/mean_terminated_length": 1191.3333740234375, "completions/min_length": 1037.0, "completions/min_terminated_length": 1037.0, "epoch": 0.9351870374074815, "frac_reward_zero_std": 0.0, "grad_norm": 2.5393879515045326, "kl": 0.0117950439453125, "learning_rate": 1.1153857651622291e-07, "loss": -0.002, "num_tokens": 204779764.0, "reward": 0.0, "reward_std": 0.4838888645172119, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.00882265411494805, "rewards/wordcountpos_reward/raw_geo/std": 0.1222205767739483, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 1094.4375, "completions/mean_terminated_length": 1094.4375, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.9353870774154831, "frac_reward_zero_std": 0.0, "grad_norm": 1.6559186987657293, "kl": 0.0090789794921875, "learning_rate": 1.1146798181295092e-07, "loss": -0.0225, "num_tokens": 204811635.0, "reward": 2.9802322387695312e-08, "reward_std": 0.963566243648529, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09426586310796371, "rewards/wordcountpos_reward/raw_geo/std": 0.09488491001093724, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 1360.8125, "completions/mean_terminated_length": 1360.8125, "completions/min_length": 1014.0, "completions/min_terminated_length": 1014.0, "epoch": 0.9355871174234847, "frac_reward_zero_std": 0.0, "grad_norm": 2.3217014045066926, "kl": 0.00936126708984375, "learning_rate": 1.1139760093988243e-07, "loss": -0.023, "num_tokens": 204856224.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9221435785293579, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10783446893749322, "rewards/wordcountpos_reward/raw_geo/std": 0.03672879810897154, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1393.0, "completions/mean_length": 1241.1875, "completions/mean_terminated_length": 1181.4615478515625, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.9357871574314863, "frac_reward_zero_std": 0.0, "grad_norm": 2.4989940006974103, "kl": 0.0120697021484375, "learning_rate": 1.1132743393133533e-07, "loss": -0.0139, "num_tokens": 204898651.0, "reward": 0.0, "reward_std": 0.8483685255050659, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.14364911170773803, "rewards/wordcountpos_reward/raw_geo/std": 0.10530753283126199, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1130.1875, "completions/mean_terminated_length": 1105.533447265625, "completions/min_length": 983.0, "completions/min_terminated_length": 983.0, "epoch": 0.9359871974394879, "frac_reward_zero_std": 0.0, "grad_norm": 3.4386106163036505, "kl": 0.01702880859375, "learning_rate": 1.1125748082152335e-07, "loss": 0.0358, "num_tokens": 204942054.0, "reward": 7.450580596923828e-09, "reward_std": 1.016179084777832, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.20641735786325827, "rewards/wordcountpos_reward/raw_geo/std": 0.11203837107137513, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08766518798921943, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1227.75, "completions/mean_terminated_length": 1209.60009765625, "completions/min_length": 1031.0, "completions/min_terminated_length": 1031.0, "epoch": 0.9361872374474896, "frac_reward_zero_std": 0.0, "grad_norm": 3.077154914043381, "kl": 0.015777587890625, "learning_rate": 1.1118774164455606e-07, "loss": 0.025, "num_tokens": 204980866.0, "reward": 0.0, "reward_std": 0.9549058079719543, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13094998242623887, "rewards/wordcountpos_reward/raw_geo/std": 0.13957238655755766, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9958333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.016666666666666663, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1150.25, "completions/mean_terminated_length": 1150.25, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.9363872774554911, "frac_reward_zero_std": 0.0, "grad_norm": 3.536421897085163, "kl": 0.0174560546875, "learning_rate": 1.1111821643443861e-07, "loss": 0.0023, "num_tokens": 205019878.0, "reward": 0.0, "reward_std": 1.0455704927444458, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006502010782115923, "rewards/wordcountpos_reward/raw_geo/std": 0.03910898850695097, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 1066.3125, "completions/mean_terminated_length": 1066.3125, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.9365873174634927, "frac_reward_zero_std": 0.0, "grad_norm": 3.5903049133731115, "kl": 0.018890380859375, "learning_rate": 1.1104890522507168e-07, "loss": 0.0321, "num_tokens": 205061979.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6977790594100952, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.008233374309280313, "rewards/wordcountpos_reward/raw_geo/std": 0.049274275822537475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1268.0, "completions/max_terminated_length": 1268.0, "completions/mean_length": 961.75, "completions/mean_terminated_length": 961.75, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.9367873574714943, "frac_reward_zero_std": 0.0, "grad_norm": 2.3935458496284183, "kl": 0.01158905029296875, "learning_rate": 1.109798080502518e-07, "loss": -0.115, "num_tokens": 205092919.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6324573755264282, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07867056077713608, "rewards/wordcountpos_reward/raw_geo/std": 0.1683857114934448, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 1194.5, "completions/mean_terminated_length": 1194.5, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.9369873974794959, "frac_reward_zero_std": 0.0, "grad_norm": 3.1959563076104374, "kl": 0.018768310546875, "learning_rate": 1.1091092494367104e-07, "loss": 0.0004, "num_tokens": 205139879.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8826788067817688, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03097868941415068, "rewards/wordcountpos_reward/raw_geo/std": 0.10340788629494647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026004, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1339.0, "completions/max_terminated_length": 1339.0, "completions/mean_length": 1140.875, "completions/mean_terminated_length": 1140.875, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.9371874374874976, "frac_reward_zero_std": 0.0, "grad_norm": 3.227657196662411, "kl": 0.0187225341796875, "learning_rate": 1.108422559389172e-07, "loss": 0.0471, "num_tokens": 205191453.0, "reward": 0.0, "reward_std": 0.9940351843833923, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3536460899292677, "rewards/wordcountpos_reward/raw_geo/std": 0.15630075004902685, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.10246950765959599, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1263.625, "completions/mean_terminated_length": 1156.181884765625, "completions/min_length": 807.0, "completions/min_terminated_length": 807.0, "epoch": 0.9373874774954991, "frac_reward_zero_std": 0.0, "grad_norm": 3.607783157296601, "kl": 0.0201416015625, "learning_rate": 1.1077380106947349e-07, "loss": 0.0051, "num_tokens": 205245791.0, "reward": 0.0, "reward_std": 0.7850897312164307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06244173624333361, "rewards/wordcountpos_reward/raw_geo/std": 0.2282932906705503, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1256.8125, "completions/mean_terminated_length": 1175.75, "completions/min_length": 929.0, "completions/min_terminated_length": 929.0, "epoch": 0.9375875175035007, "frac_reward_zero_std": 0.0, "grad_norm": 3.049550668537532, "kl": 0.018646240234375, "learning_rate": 1.1070556036871886e-07, "loss": -0.0204, "num_tokens": 205290300.0, "reward": 0.0, "reward_std": 0.799578070640564, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.34319292309207977, "rewards/wordcountpos_reward/raw_geo/std": 0.13613830902322133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.13709958532503408, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 1221.8125, "completions/mean_terminated_length": 1182.071533203125, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.9377875575115023, "frac_reward_zero_std": 0.0, "grad_norm": 3.102623007374376, "kl": 0.019317626953125, "learning_rate": 1.106375338699278e-07, "loss": -0.0199, "num_tokens": 205334809.0, "reward": 0.0, "reward_std": 0.6601782441139221, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1157105852503537, "rewards/wordcountpos_reward/raw_geo/std": 0.07118292036703143, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11474609652039006, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 1085.9375, "completions/mean_terminated_length": 1085.9375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.9379875975195039, "frac_reward_zero_std": 0.0, "grad_norm": 3.162276135334081, "kl": 0.013580322265625, "learning_rate": 1.1056972160627038e-07, "loss": -0.0215, "num_tokens": 205382016.0, "reward": 0.0, "reward_std": 0.6247299909591675, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0014044012068165253, "rewards/wordcountpos_reward/raw_geo/std": 0.13169288330183013, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1297.0, "completions/max_terminated_length": 1297.0, "completions/mean_length": 1115.875, "completions/mean_terminated_length": 1115.875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.9381876375275054, "frac_reward_zero_std": 0.0, "grad_norm": 3.1271448066602328, "kl": 0.018310546875, "learning_rate": 1.1050212361081204e-07, "loss": 0.0106, "num_tokens": 205435950.0, "reward": -4.470348358154297e-08, "reward_std": 0.9963032007217407, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0429159963555076, "rewards/wordcountpos_reward/raw_geo/std": 0.06586641585415251, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 967.5625, "completions/mean_terminated_length": 967.5625, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.9383876775355071, "frac_reward_zero_std": 0.0, "grad_norm": 3.435267449278294, "kl": 0.026153564453125, "learning_rate": 1.1043473991651402e-07, "loss": -0.0161, "num_tokens": 205476903.0, "reward": -3.725290298461914e-08, "reward_std": 0.9964874982833862, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10465279215451324, "rewards/wordcountpos_reward/raw_geo/std": 0.06739285582082429, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101761, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1310.0625, "completions/mean_terminated_length": 1223.727294921875, "completions/min_length": 1086.0, "completions/min_terminated_length": 1086.0, "epoch": 0.9385877175435087, "frac_reward_zero_std": 0.0, "grad_norm": 2.931195747646922, "kl": 0.0151519775390625, "learning_rate": 1.1036757055623277e-07, "loss": -0.0015, "num_tokens": 205530824.0, "reward": 0.0, "reward_std": 0.6840204000473022, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08313096120364644, "rewards/wordcountpos_reward/raw_geo/std": 0.10583421061675652, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1300.3125, "completions/mean_terminated_length": 1287.0001220703125, "completions/min_length": 1184.0, "completions/min_terminated_length": 1184.0, "epoch": 0.9387877575515103, "frac_reward_zero_std": 0.0, "grad_norm": 3.062321616615696, "kl": 0.0162200927734375, "learning_rate": 1.1030061556272048e-07, "loss": 0.0178, "num_tokens": 205573205.0, "reward": 0.0, "reward_std": 0.6127489805221558, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0673595766465054, "rewards/wordcountpos_reward/raw_geo/std": 0.04279767144345386, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.13158576980363348, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1293.9375, "completions/mean_terminated_length": 1087.875, "completions/min_length": 985.0, "completions/min_terminated_length": 985.0, "epoch": 0.9389877975595119, "frac_reward_zero_std": 0.0, "grad_norm": 2.9090323091800507, "kl": 0.0144805908203125, "learning_rate": 1.1023387496862465e-07, "loss": -0.0305, "num_tokens": 205622852.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7699249982833862, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00889821214630498, "rewards/wordcountpos_reward/raw_geo/std": 0.06356806347719832, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1448.0, "completions/max_terminated_length": 1448.0, "completions/mean_length": 1201.0, "completions/mean_terminated_length": 1201.0, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.9391878375675135, "frac_reward_zero_std": 0.0, "grad_norm": 3.4027433903999214, "kl": 0.0235595703125, "learning_rate": 1.1016734880648835e-07, "loss": -0.0113, "num_tokens": 205676396.0, "reward": 0.0, "reward_std": 1.000368595123291, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1662985861930147, "rewards/wordcountpos_reward/raw_geo/std": 0.11457405914012876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1067187372905475, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1272.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1030.5, "completions/mean_terminated_length": 1030.5, "completions/min_length": 899.0, "completions/min_terminated_length": 899.0, "epoch": 0.9393878775755151, "frac_reward_zero_std": 0.0, "grad_norm": 3.2353673082954444, "kl": 0.017547607421875, "learning_rate": 1.1010103710875001e-07, "loss": 0.024, "num_tokens": 205710748.0, "reward": 0.0, "reward_std": 1.0521843433380127, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0281380815262721, "rewards/wordcountpos_reward/raw_geo/std": 0.18099815095069335, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 888.0625, "completions/mean_terminated_length": 888.0625, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.9395879175835167, "frac_reward_zero_std": 0.0, "grad_norm": 3.23189346623377, "kl": 0.0142669677734375, "learning_rate": 1.1003493990774347e-07, "loss": 0.0003, "num_tokens": 205745293.0, "reward": -3.3527612686157227e-08, "reward_std": 1.0187721252441406, "rewards/wordcountpos_reward/mean": -3.3527612686157227e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.052741695112763054, "rewards/wordcountpos_reward/raw_geo/std": 0.14495781718599166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460883, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1308.5625, "completions/mean_terminated_length": 1244.75, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.9397879575915183, "frac_reward_zero_std": 0.0, "grad_norm": 3.1507713708958947, "kl": 0.01788330078125, "learning_rate": 1.0996905723569809e-07, "loss": 0.0236, "num_tokens": 205786166.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8681209087371826, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057455638814998815, "rewards/wordcountpos_reward/raw_geo/std": 0.06596692362821698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 974.125, "completions/mean_terminated_length": 974.125, "completions/min_length": 897.0, "completions/min_terminated_length": 897.0, "epoch": 0.9399879975995199, "frac_reward_zero_std": 0.0, "grad_norm": 2.235666660371771, "kl": 0.0115203857421875, "learning_rate": 1.0990338912473861e-07, "loss": -0.0234, "num_tokens": 205817016.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0448780059814453, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03406433014232842, "rewards/wordcountpos_reward/raw_geo/std": 0.07309618440304569, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1214.75, "completions/mean_terminated_length": 1195.7333984375, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.9401880376075215, "frac_reward_zero_std": 0.0, "grad_norm": 3.308866069272402, "kl": 0.01812744140625, "learning_rate": 1.0983793560688497e-07, "loss": -0.0319, "num_tokens": 205865620.0, "reward": -7.450580596923828e-09, "reward_std": 1.0577950477600098, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.0798330935067761, "rewards/wordcountpos_reward/raw_geo/std": 0.12449645939688883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.09574271077563382, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1116.25, "completions/mean_terminated_length": 1090.666748046875, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.9403880776155231, "frac_reward_zero_std": 0.0, "grad_norm": 2.8678316591104793, "kl": 0.016204833984375, "learning_rate": 1.0977269671405267e-07, "loss": -0.0267, "num_tokens": 205916040.0, "reward": 0.0, "reward_std": 0.3900395631790161, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09350354242182106, "rewards/wordcountpos_reward/raw_geo/std": 0.05772231947317734, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1243.5, "completions/mean_terminated_length": 1243.5, "completions/min_length": 1018.0, "completions/min_terminated_length": 1018.0, "epoch": 0.9405881176235247, "frac_reward_zero_std": 0.0, "grad_norm": 2.7132375431451314, "kl": 0.0153045654296875, "learning_rate": 1.0970767247805243e-07, "loss": 0.0256, "num_tokens": 205961096.0, "reward": 0.0, "reward_std": 0.8125902414321899, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10683051634390073, "rewards/wordcountpos_reward/raw_geo/std": 0.201403637918061, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1264.5, "completions/mean_terminated_length": 1264.5, "completions/min_length": 1016.0, "completions/min_terminated_length": 1016.0, "epoch": 0.9407881576315263, "frac_reward_zero_std": 0.0, "grad_norm": 2.9806800772049127, "kl": 0.01849365234375, "learning_rate": 1.096428629305905e-07, "loss": -0.0303, "num_tokens": 206014096.0, "reward": 0.0, "reward_std": 0.7722357511520386, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14834822886383614, "rewards/wordcountpos_reward/raw_geo/std": 0.05527432574165461, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.11674600476945511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 1028.75, "completions/mean_terminated_length": 961.4285888671875, "completions/min_length": 737.0, "completions/min_terminated_length": 737.0, "epoch": 0.9409881976395279, "frac_reward_zero_std": 0.0, "grad_norm": 3.554078075043909, "kl": 0.015960693359375, "learning_rate": 1.0957826810326819e-07, "loss": -0.11, "num_tokens": 206058252.0, "reward": 0.0, "reward_std": 0.7767568826675415, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08667131052282284, "rewards/wordcountpos_reward/raw_geo/std": 0.12907745461255946, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.1465024333004847, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1312.0625, "completions/mean_terminated_length": 1268.6923828125, "completions/min_length": 1119.0, "completions/min_terminated_length": 1119.0, "epoch": 0.9411882376475295, "frac_reward_zero_std": 0.0, "grad_norm": 2.7619878666236364, "kl": 0.0164642333984375, "learning_rate": 1.0951388802758224e-07, "loss": -0.019, "num_tokens": 206104205.0, "reward": 0.0, "reward_std": 1.0167866945266724, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.13687628671471788, "rewards/wordcountpos_reward/raw_geo/std": 0.04131064457981925, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1183.8125, "completions/mean_terminated_length": 1183.8125, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.9413882776555311, "frac_reward_zero_std": 0.0, "grad_norm": 3.291336634216073, "kl": 0.01812744140625, "learning_rate": 1.0944972273492471e-07, "loss": -0.0656, "num_tokens": 206145074.0, "reward": 0.0, "reward_std": 0.7681623697280884, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.425568751716318, "rewards/wordcountpos_reward/raw_geo/std": 0.43347201901385346, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.0709720863229836, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 1147.3125, "completions/mean_terminated_length": 1147.3125, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.9415883176635327, "frac_reward_zero_std": 0.0, "grad_norm": 2.8196480850711563, "kl": 0.014373779296875, "learning_rate": 1.0938577225658291e-07, "loss": 0.016, "num_tokens": 206186879.0, "reward": 1.4901161193847656e-08, "reward_std": 0.943395733833313, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0912620989730314, "rewards/wordcountpos_reward/raw_geo/std": 0.07791896169165188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1162.125, "completions/mean_terminated_length": 1162.125, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.9417883576715343, "frac_reward_zero_std": 0.0, "grad_norm": 3.1830461012758855, "kl": 0.01519775390625, "learning_rate": 1.0932203662373926e-07, "loss": -0.0328, "num_tokens": 206225257.0, "reward": 0.0, "reward_std": 0.786467969417572, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1873289554377059, "rewards/wordcountpos_reward/raw_geo/std": 0.17367773555429952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1217.0625, "completions/mean_terminated_length": 1198.2000732421875, "completions/min_length": 1028.0, "completions/min_terminated_length": 1028.0, "epoch": 0.9419883976795359, "frac_reward_zero_std": 0.0, "grad_norm": 3.0366215425697245, "kl": 0.019805908203125, "learning_rate": 1.0925851586747175e-07, "loss": -0.0305, "num_tokens": 206269026.0, "reward": -5.960464477539063e-08, "reward_std": 0.5212951898574829, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.34600648161254655, "rewards/wordcountpos_reward/raw_geo/std": 0.46133198219917726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1299.5625, "completions/mean_terminated_length": 1232.75, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.9421884376875375, "frac_reward_zero_std": 0.0, "grad_norm": 2.5837864614198094, "kl": 0.017425537109375, "learning_rate": 1.0919521001875318e-07, "loss": -0.0237, "num_tokens": 206320867.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8811289072036743, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05301500650663669, "rewards/wordcountpos_reward/raw_geo/std": 0.06961124078347772, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.19007795671678931, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1163.125, "completions/mean_terminated_length": 1115.0, "completions/min_length": 900.0, "completions/min_terminated_length": 900.0, "epoch": 0.9423884776955391, "frac_reward_zero_std": 0.0, "grad_norm": 3.0340060082306457, "kl": 0.0164031982421875, "learning_rate": 1.0913211910845188e-07, "loss": 0.001, "num_tokens": 206361853.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7542604207992554, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08420826334600572, "rewards/wordcountpos_reward/raw_geo/std": 0.06740655650252121, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 964.9375, "completions/mean_terminated_length": 964.9375, "completions/min_length": 660.0, "completions/min_terminated_length": 660.0, "epoch": 0.9425885177035407, "frac_reward_zero_std": 0.0, "grad_norm": 3.7029742046545198, "kl": 0.01751708984375, "learning_rate": 1.0906924316733118e-07, "loss": 0.0253, "num_tokens": 206396812.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0309643745422363, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11832137248034959, "rewards/wordcountpos_reward/raw_geo/std": 0.10793326254552953, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07969850595746357, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1348.375, "completions/mean_terminated_length": 1297.8333740234375, "completions/min_length": 1101.0, "completions/min_terminated_length": 1101.0, "epoch": 0.9427885577115424, "frac_reward_zero_std": 0.0, "grad_norm": 2.4645694799670372, "kl": 0.0139923095703125, "learning_rate": 1.0900658222604982e-07, "loss": -0.0132, "num_tokens": 206450370.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9487062096595764, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08116372376680527, "rewards/wordcountpos_reward/raw_geo/std": 0.07418549490750763, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1455.0, "completions/max_terminated_length": 1455.0, "completions/mean_length": 1127.1875, "completions/mean_terminated_length": 1127.1875, "completions/min_length": 713.0, "completions/min_terminated_length": 713.0, "epoch": 0.9429885977195439, "frac_reward_zero_std": 0.0, "grad_norm": 3.29682404232205, "kl": 0.018035888671875, "learning_rate": 1.0894413631516144e-07, "loss": -0.0229, "num_tokens": 206488533.0, "reward": 0.0, "reward_std": 0.8401923775672913, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.18620683923430772, "rewards/wordcountpos_reward/raw_geo/std": 0.18609392555879203, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.12758439472669758, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1115.6875, "completions/mean_terminated_length": 1090.0667724609375, "completions/min_length": 714.0, "completions/min_terminated_length": 714.0, "epoch": 0.9431886377275455, "frac_reward_zero_std": 0.0, "grad_norm": 3.726212523663001, "kl": 0.021270751953125, "learning_rate": 1.0888190546511501e-07, "loss": 0.0361, "num_tokens": 206529144.0, "reward": -7.450580596923828e-09, "reward_std": 0.9623713493347168, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.026612327417626314, "rewards/wordcountpos_reward/raw_geo/std": 0.09975371084442838, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.11894598836509009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1244.5, "completions/mean_terminated_length": 1208.0, "completions/min_length": 909.0, "completions/min_terminated_length": 909.0, "epoch": 0.9433886777355471, "frac_reward_zero_std": 0.0, "grad_norm": 3.0068729712084705, "kl": 0.018951416015625, "learning_rate": 1.0881988970625453e-07, "loss": 0.0118, "num_tokens": 206578296.0, "reward": 0.0, "reward_std": 0.8304640054702759, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.24720262378465502, "rewards/wordcountpos_reward/raw_geo/std": 0.21342448620525936, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1157.5, "completions/mean_terminated_length": 1157.5, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.9435887177435487, "frac_reward_zero_std": 0.0, "grad_norm": 3.4439079873076395, "kl": 0.0205078125, "learning_rate": 1.0875808906881924e-07, "loss": 0.0293, "num_tokens": 206622400.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8645848035812378, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04601238597210609, "rewards/wordcountpos_reward/raw_geo/std": 0.11152366051931874, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 1.0, "rewards/wordcountpos_reward/raw_rule/std": 0.0, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1247.1875, "completions/mean_terminated_length": 1230.3333740234375, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.9437887577515504, "frac_reward_zero_std": 0.0, "grad_norm": 3.3289034118491063, "kl": 0.0162200927734375, "learning_rate": 1.0869650358294326e-07, "loss": -0.0506, "num_tokens": 206676499.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0299487113952637, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03796886188272028, "rewards/wordcountpos_reward/raw_geo/std": 0.06057097683834237, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.13763881881375054, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.6875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1464.3125, "completions/mean_terminated_length": 1385.800048828125, "completions/min_length": 1106.0, "completions/min_terminated_length": 1106.0, "epoch": 0.9439887977595519, "frac_reward_zero_std": 0.0, "grad_norm": 2.371920588673585, "kl": 0.010101318359375, "learning_rate": 1.0863513327865613e-07, "loss": -0.0083, "num_tokens": 206734080.0, "reward": 0.0, "reward_std": 0.8531865477561951, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.046376504384027156, "rewards/wordcountpos_reward/raw_geo/std": 0.07878193664775529, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.75, "rewards/wordcountpos_reward/raw_rule/std": 0.13221755360572016, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1295.125, "completions/mean_terminated_length": 1265.857177734375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.9441888377675535, "frac_reward_zero_std": 0.0, "grad_norm": 3.0254889875694433, "kl": 0.0149383544921875, "learning_rate": 1.0857397818588215e-07, "loss": 0.002, "num_tokens": 206781194.0, "reward": 0.0, "reward_std": 0.6677308082580566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02966463621740676, "rewards/wordcountpos_reward/raw_geo/std": 0.23959714926583728, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11642832797715322, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.75, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1471.4375, "completions/mean_terminated_length": 1385.75, "completions/min_length": 1214.0, "completions/min_terminated_length": 1214.0, "epoch": 0.9443888777755551, "frac_reward_zero_std": 0.0, "grad_norm": 2.860357507910352, "kl": 0.015350341796875, "learning_rate": 1.0851303833444093e-07, "loss": 0.0139, "num_tokens": 206838641.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7937328815460205, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.048980651573498166, "rewards/wordcountpos_reward/raw_geo/std": 0.07682823419236805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1010.8125, "completions/mean_terminated_length": 1010.8125, "completions/min_length": 802.0, "completions/min_terminated_length": 802.0, "epoch": 0.9445889177835567, "frac_reward_zero_std": 0.0, "grad_norm": 3.0480731616729133, "kl": 0.0134124755859375, "learning_rate": 1.0845231375404695e-07, "loss": -0.0479, "num_tokens": 206873334.0, "reward": -4.470348358154297e-08, "reward_std": 1.0126796960830688, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017413190830212377, "rewards/wordcountpos_reward/raw_geo/std": 0.040988465048213914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1060.0, "completions/mean_length": 1212.9375, "completions/mean_terminated_length": 925.875, "completions/min_length": 744.0, "completions/min_terminated_length": 744.0, "epoch": 0.9447889577915584, "frac_reward_zero_std": 0.0, "grad_norm": 3.346865576262732, "kl": 0.0197906494140625, "learning_rate": 1.0839180447430978e-07, "loss": -0.0518, "num_tokens": 206925989.0, "reward": 5.960464477539063e-08, "reward_std": 0.922878623008728, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.017596821733710317, "rewards/wordcountpos_reward/raw_geo/std": 0.02628336786646561, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457552, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1044.6875, "completions/mean_terminated_length": 1014.3333740234375, "completions/min_length": 538.0, "completions/min_terminated_length": 538.0, "epoch": 0.9449889977995599, "frac_reward_zero_std": 0.0, "grad_norm": 3.417763031105403, "kl": 0.019134521484375, "learning_rate": 1.0833151052473407e-07, "loss": -0.0296, "num_tokens": 206964616.0, "reward": 0.0, "reward_std": 0.8064069747924805, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.014656567104944007, "rewards/wordcountpos_reward/raw_geo/std": 0.11452574110150275, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12405196043952266, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 1245.0625, "completions/mean_terminated_length": 1129.181884765625, "completions/min_length": 862.0, "completions/min_terminated_length": 862.0, "epoch": 0.9451890378075615, "frac_reward_zero_std": 0.0, "grad_norm": 2.7878626988939863, "kl": 0.0133056640625, "learning_rate": 1.0827143193471942e-07, "loss": 0.0303, "num_tokens": 207008569.0, "reward": -2.9802322387695312e-08, "reward_std": 0.792611300945282, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.30854237605435464, "rewards/wordcountpos_reward/raw_geo/std": 0.23358187418093188, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 1029.0, "completions/mean_terminated_length": 1029.0, "completions/min_length": 789.0, "completions/min_terminated_length": 789.0, "epoch": 0.9453890778155631, "frac_reward_zero_std": 0.0, "grad_norm": 3.556312797126337, "kl": 0.01629638671875, "learning_rate": 1.0821156873356039e-07, "loss": -0.0389, "num_tokens": 207048665.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9591029286384583, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12281680198552128, "rewards/wordcountpos_reward/raw_geo/std": 0.37975378904276924, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1275.5, "completions/mean_terminated_length": 1243.4285888671875, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.9455891178235647, "frac_reward_zero_std": 0.0, "grad_norm": 3.307913017609395, "kl": 0.0208740234375, "learning_rate": 1.0815192095044655e-07, "loss": 0.0298, "num_tokens": 207094737.0, "reward": 0.0, "reward_std": 1.008724331855774, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1148178528892381, "rewards/wordcountpos_reward/raw_geo/std": 0.12403149237228149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1345.5625, "completions/mean_terminated_length": 1275.3636474609375, "completions/min_length": 692.0, "completions/min_terminated_length": 692.0, "epoch": 0.9457891578315664, "frac_reward_zero_std": 0.0, "grad_norm": 2.7441727768123574, "kl": 0.01385498046875, "learning_rate": 1.0809248861446249e-07, "loss": -0.0803, "num_tokens": 207148418.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5671953558921814, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07801120994173154, "rewards/wordcountpos_reward/raw_geo/std": 0.15905743099245484, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.07888106377466153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 1126.9375, "completions/mean_terminated_length": 1126.9375, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.9459891978395679, "frac_reward_zero_std": 0.0, "grad_norm": 3.2404809002165287, "kl": 0.01666259765625, "learning_rate": 1.080332717545876e-07, "loss": 0.0245, "num_tokens": 207194209.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9159021973609924, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.024526396777982153, "rewards/wordcountpos_reward/raw_geo/std": 0.05836163874936855, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590962, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1275.0, "completions/max_terminated_length": 1275.0, "completions/mean_length": 1078.0, "completions/mean_terminated_length": 1078.0, "completions/min_length": 874.0, "completions/min_terminated_length": 874.0, "epoch": 0.9461892378475695, "frac_reward_zero_std": 0.0, "grad_norm": 3.589607201198031, "kl": 0.018585205078125, "learning_rate": 1.0797427039969637e-07, "loss": 0.0093, "num_tokens": 207227641.0, "reward": 0.0, "reward_std": 0.550682544708252, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09757222178993019, "rewards/wordcountpos_reward/raw_geo/std": 0.11474710951041935, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090449, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1266.9375, "completions/mean_terminated_length": 1233.6429443359375, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.9463892778555711, "frac_reward_zero_std": 0.0, "grad_norm": 3.2823187820788178, "kl": 0.02032470703125, "learning_rate": 1.0791548457855803e-07, "loss": 0.0155, "num_tokens": 207270600.0, "reward": 1.4901161193847656e-08, "reward_std": 0.987983226776123, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.021522020006223122, "rewards/wordcountpos_reward/raw_geo/std": 0.05185955402241297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0697880388775209, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1412.0, "completions/max_terminated_length": 1412.0, "completions/mean_length": 1196.6875, "completions/mean_terminated_length": 1196.6875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9465893178635727, "frac_reward_zero_std": 0.0, "grad_norm": 3.4336441043006283, "kl": 0.020904541015625, "learning_rate": 1.0785691431983696e-07, "loss": -0.0014, "num_tokens": 207314371.0, "reward": 0.0, "reward_std": 0.753727912902832, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1943819885785578, "rewards/wordcountpos_reward/raw_geo/std": 0.1333137784556267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1282.3125, "completions/mean_terminated_length": 1251.21435546875, "completions/min_length": 968.0, "completions/min_terminated_length": 968.0, "epoch": 0.9467893578715744, "frac_reward_zero_std": 0.0, "grad_norm": 2.529130672089568, "kl": 0.0139923095703125, "learning_rate": 1.0779855965209214e-07, "loss": 0.0048, "num_tokens": 207364240.0, "reward": 0.0, "reward_std": 1.0291147232055664, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09860310853641348, "rewards/wordcountpos_reward/raw_geo/std": 0.14263765514125928, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1083.0, "completions/mean_terminated_length": 1083.0, "completions/min_length": 752.0, "completions/min_terminated_length": 752.0, "epoch": 0.9469893978795759, "frac_reward_zero_std": 0.0, "grad_norm": 3.273591364354192, "kl": 0.020843505859375, "learning_rate": 1.077404206037776e-07, "loss": -0.0379, "num_tokens": 207410216.0, "reward": 0.0, "reward_std": 0.8964054584503174, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.059569360255852175, "rewards/wordcountpos_reward/raw_geo/std": 0.12199842335137769, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1124.6875, "completions/mean_terminated_length": 1124.6875, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.9471894378875775, "frac_reward_zero_std": 0.0, "grad_norm": 2.4365235880986185, "kl": 0.01087188720703125, "learning_rate": 1.0768249720324223e-07, "loss": 0.026, "num_tokens": 207450827.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9895091652870178, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09987519549747459, "rewards/wordcountpos_reward/raw_geo/std": 0.10773159196850858, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1419.0, "completions/max_terminated_length": 1419.0, "completions/mean_length": 1010.6875, "completions/mean_terminated_length": 1010.6875, "completions/min_length": 655.0, "completions/min_terminated_length": 655.0, "epoch": 0.9473894778955791, "frac_reward_zero_std": 0.0, "grad_norm": 4.069361042717239, "kl": 0.01922607421875, "learning_rate": 1.0762478947872983e-07, "loss": -0.0804, "num_tokens": 207494710.0, "reward": -5.960464477539063e-08, "reward_std": 0.6879798173904419, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07780226410101361, "rewards/wordcountpos_reward/raw_geo/std": 0.2256929239530281, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1114.0, "completions/max_terminated_length": 1114.0, "completions/mean_length": 926.75, "completions/mean_terminated_length": 926.75, "completions/min_length": 804.0, "completions/min_terminated_length": 804.0, "epoch": 0.9475895179035807, "frac_reward_zero_std": 0.0, "grad_norm": 3.7731531769753692, "kl": 0.01934814453125, "learning_rate": 1.0756729745837879e-07, "loss": -0.0089, "num_tokens": 207542162.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8141787052154541, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.14762959095422984, "rewards/wordcountpos_reward/raw_geo/std": 0.14630682502044431, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 1229.9375, "completions/mean_terminated_length": 1229.9375, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.9477895579115824, "frac_reward_zero_std": 0.0, "grad_norm": 3.287240471243821, "kl": 0.0173797607421875, "learning_rate": 1.0751002117022262e-07, "loss": -0.0009, "num_tokens": 207593329.0, "reward": 0.0, "reward_std": 1.0143147706985474, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12433237484578269, "rewards/wordcountpos_reward/raw_geo/std": 0.07058937709044719, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.12852438880818895, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1426.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1109.75, "completions/mean_terminated_length": 1109.75, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.9479895979195839, "frac_reward_zero_std": 0.0, "grad_norm": 3.7138389846104203, "kl": 0.0143280029296875, "learning_rate": 1.0745296064218935e-07, "loss": -0.0328, "num_tokens": 207636741.0, "reward": 0.0, "reward_std": 1.0315366983413696, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2509012766862678, "rewards/wordcountpos_reward/raw_geo/std": 0.22712012843521698, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.14168300559373406, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 1255.75, "completions/mean_terminated_length": 1239.4666748046875, "completions/min_length": 1060.0, "completions/min_terminated_length": 1060.0, "epoch": 0.9481896379275855, "frac_reward_zero_std": 0.0, "grad_norm": 2.564023145471269, "kl": 0.01165771484375, "learning_rate": 1.0739611590210212e-07, "loss": 0.0095, "num_tokens": 207672017.0, "reward": -5.587935447692871e-09, "reward_std": 0.9596502780914307, "rewards/wordcountpos_reward/mean": -5.587935447692871e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.09677796670400592, "rewards/wordcountpos_reward/raw_geo/std": 0.08662101536226016, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1120.125, "completions/mean_terminated_length": 1094.800048828125, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.9483896779355871, "frac_reward_zero_std": 0.0, "grad_norm": 2.746043051353714, "kl": 0.00948333740234375, "learning_rate": 1.073394869776785e-07, "loss": 0.0177, "num_tokens": 207715315.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8153335452079773, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06677478505455042, "rewards/wordcountpos_reward/raw_geo/std": 0.15217767033603471, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1108.375, "completions/mean_terminated_length": 1108.375, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.9485897179435887, "frac_reward_zero_std": 0.0, "grad_norm": 3.4469646773817586, "kl": 0.0194091796875, "learning_rate": 1.0728307389653119e-07, "loss": -0.0266, "num_tokens": 207762609.0, "reward": 0.0, "reward_std": 0.9126440286636353, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10684763874570319, "rewards/wordcountpos_reward/raw_geo/std": 0.1680620626975244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.07097208632298362, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 1104.6875, "completions/mean_terminated_length": 1078.3333740234375, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9487897579515904, "frac_reward_zero_std": 0.0, "grad_norm": 3.7209265887426954, "kl": 0.020233154296875, "learning_rate": 1.0722687668616733e-07, "loss": 0.0402, "num_tokens": 207813324.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6747462153434753, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.10234664497512307, "rewards/wordcountpos_reward/raw_geo/std": 0.03519728308352306, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.16843506277010845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1037.125, "completions/mean_terminated_length": 971.0000610351562, "completions/min_length": 495.0, "completions/min_terminated_length": 495.0, "epoch": 0.9489897979595919, "frac_reward_zero_std": 0.0, "grad_norm": 2.952254977175122, "kl": 0.0152435302734375, "learning_rate": 1.0717089537398902e-07, "loss": -0.1141, "num_tokens": 207852798.0, "reward": 0.0, "reward_std": 0.7303816080093384, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04841992363938596, "rewards/wordcountpos_reward/raw_geo/std": 0.1587466327792322, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 1070.8125, "completions/mean_terminated_length": 1070.8125, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.9491898379675935, "frac_reward_zero_std": 0.0, "grad_norm": 2.2822478697325495, "kl": 0.00885772705078125, "learning_rate": 1.0711512998729293e-07, "loss": 0.0329, "num_tokens": 207899019.0, "reward": 0.0, "reward_std": 0.868086576461792, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07877121025000554, "rewards/wordcountpos_reward/raw_geo/std": 0.07531183531520755, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195305, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1323.5625, "completions/mean_terminated_length": 1298.357177734375, "completions/min_length": 1041.0, "completions/min_terminated_length": 1041.0, "epoch": 0.9493898779755952, "frac_reward_zero_std": 0.0, "grad_norm": 2.8916286083595355, "kl": 0.020538330078125, "learning_rate": 1.0705958055327064e-07, "loss": -0.0245, "num_tokens": 207945916.0, "reward": 0.0, "reward_std": 0.9740926623344421, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04779280411704018, "rewards/wordcountpos_reward/raw_geo/std": 0.19684625159560992, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1144.75, "completions/mean_terminated_length": 1144.75, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.9495899179835967, "frac_reward_zero_std": 0.0, "grad_norm": 2.6898507768947644, "kl": 0.014495849609375, "learning_rate": 1.0700424709900813e-07, "loss": 0.0179, "num_tokens": 207991072.0, "reward": 0.0, "reward_std": 0.6322691440582275, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05815150445265959, "rewards/wordcountpos_reward/raw_geo/std": 0.49728973191369014, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 1247.3125, "completions/mean_terminated_length": 994.625, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.9497899579915983, "frac_reward_zero_std": 0.0, "grad_norm": 2.851396896796251, "kl": 0.016021728515625, "learning_rate": 1.0694912965148641e-07, "loss": -0.0416, "num_tokens": 208040693.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9463974237442017, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05549325889710355, "rewards/wordcountpos_reward/raw_geo/std": 0.16476693188414637, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.14580555290954889, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 1144.75, "completions/mean_terminated_length": 1144.75, "completions/min_length": 971.0, "completions/min_terminated_length": 971.0, "epoch": 0.9499899979995999, "frac_reward_zero_std": 0.0, "grad_norm": 3.7605558396285077, "kl": 0.021453857421875, "learning_rate": 1.0689422823758083e-07, "loss": -0.0048, "num_tokens": 208091097.0, "reward": 0.0, "reward_std": 0.5708290338516235, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07703809778052738, "rewards/wordcountpos_reward/raw_geo/std": 0.09737461583687085, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.6625, "rewards/wordcountpos_reward/raw_rule/std": 0.28851471494663966, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 1133.5625, "completions/mean_terminated_length": 1133.5625, "completions/min_length": 942.0, "completions/min_terminated_length": 942.0, "epoch": 0.9501900380076015, "frac_reward_zero_std": 0.0, "grad_norm": 3.3322593237137235, "kl": 0.017303466796875, "learning_rate": 1.068395428840618e-07, "loss": 0.0202, "num_tokens": 208125674.0, "reward": 7.450580596923828e-09, "reward_std": 1.0600394010543823, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.08400819000057691, "rewards/wordcountpos_reward/raw_geo/std": 0.05563934483864661, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1166.8125, "completions/mean_terminated_length": 1144.60009765625, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.9503900780156032, "frac_reward_zero_std": 0.0, "grad_norm": 3.3820858828522677, "kl": 0.017974853515625, "learning_rate": 1.067850736175939e-07, "loss": -0.0074, "num_tokens": 208161759.0, "reward": 0.0, "reward_std": 0.5749198794364929, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07236671721243017, "rewards/wordcountpos_reward/raw_geo/std": 0.04427002731516801, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1441.0, "completions/mean_length": 1211.5625, "completions/mean_terminated_length": 1170.357177734375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.9505901180236047, "frac_reward_zero_std": 0.0, "grad_norm": 2.9400721448198506, "kl": 0.0145416259765625, "learning_rate": 1.0673082046473672e-07, "loss": -0.0039, "num_tokens": 208195848.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8882012367248535, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15373013644687702, "rewards/wordcountpos_reward/raw_geo/std": 0.06431178152609267, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 1116.5, "completions/mean_terminated_length": 1116.5, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.9507901580316063, "frac_reward_zero_std": 0.0, "grad_norm": 3.4301110446678416, "kl": 0.0174560546875, "learning_rate": 1.0667678345194433e-07, "loss": 0.0086, "num_tokens": 208237336.0, "reward": 0.0, "reward_std": 0.8943558931350708, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.006470579134068413, "rewards/wordcountpos_reward/raw_geo/std": 0.06777092997927636, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1362.25, "completions/mean_terminated_length": 1279.5999755859375, "completions/min_length": 1151.0, "completions/min_terminated_length": 1151.0, "epoch": 0.9509901980396079, "frac_reward_zero_std": 0.0, "grad_norm": 2.1432204082511745, "kl": 0.011932373046875, "learning_rate": 1.0662296260556548e-07, "loss": 0.003, "num_tokens": 208283140.0, "reward": 0.0, "reward_std": 0.870155394077301, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05645313254742909, "rewards/wordcountpos_reward/raw_geo/std": 0.11268319638041463, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1046.875, "completions/mean_terminated_length": 1046.875, "completions/min_length": 719.0, "completions/min_terminated_length": 719.0, "epoch": 0.9511902380476095, "frac_reward_zero_std": 0.0, "grad_norm": 3.540907436858697, "kl": 0.017364501953125, "learning_rate": 1.0656935795184329e-07, "loss": 0.0346, "num_tokens": 208321178.0, "reward": 0.0, "reward_std": 0.5930696725845337, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23529774727400105, "rewards/wordcountpos_reward/raw_geo/std": 0.11969769332850269, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.048686449556014755, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1088.8125, "completions/mean_terminated_length": 1088.8125, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.9513902780556112, "frac_reward_zero_std": 0.0, "grad_norm": 3.73713326569395, "kl": 0.021820068359375, "learning_rate": 1.0651596951691581e-07, "loss": -0.0014, "num_tokens": 208362039.0, "reward": 2.2351741790771484e-08, "reward_std": 1.058232307434082, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023342220356802036, "rewards/wordcountpos_reward/raw_geo/std": 0.03447654237620089, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1396.0, "completions/max_terminated_length": 1396.0, "completions/mean_length": 1198.125, "completions/mean_terminated_length": 1198.125, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.9515903180636127, "frac_reward_zero_std": 0.0, "grad_norm": 3.148422615457423, "kl": 0.015533447265625, "learning_rate": 1.0646279732681539e-07, "loss": 0.027, "num_tokens": 208408929.0, "reward": 0.0, "reward_std": 0.9208203554153442, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3230813231780859, "rewards/wordcountpos_reward/raw_geo/std": 0.21966207535352128, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10945995377982527, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1457.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1097.5, "completions/mean_terminated_length": 1097.5, "completions/min_length": 726.0, "completions/min_terminated_length": 726.0, "epoch": 0.9517903580716143, "frac_reward_zero_std": 0.0, "grad_norm": 3.6152137625246796, "kl": 0.021240234375, "learning_rate": 1.0640984140746907e-07, "loss": -0.0003, "num_tokens": 208447673.0, "reward": 0.0, "reward_std": 0.7861734628677368, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12129926740027007, "rewards/wordcountpos_reward/raw_geo/std": 0.07942883566214946, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1099.9375, "completions/mean_terminated_length": 1073.2667236328125, "completions/min_length": 843.0, "completions/min_terminated_length": 843.0, "epoch": 0.9519903980796159, "frac_reward_zero_std": 0.0, "grad_norm": 3.281212676747429, "kl": 0.0162811279296875, "learning_rate": 1.063571017846983e-07, "loss": -0.0389, "num_tokens": 208490488.0, "reward": 0.0, "reward_std": 0.798647403717041, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.033348403009466954, "rewards/wordcountpos_reward/raw_geo/std": 0.0603558061450506, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.04554200340426487, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1322.0, "completions/max_terminated_length": 1322.0, "completions/mean_length": 1077.6875, "completions/mean_terminated_length": 1077.6875, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.9521904380876175, "frac_reward_zero_std": 0.0, "grad_norm": 3.847836555786951, "kl": 0.02069091796875, "learning_rate": 1.0630457848421937e-07, "loss": 0.0248, "num_tokens": 208528883.0, "reward": 0.0, "reward_std": 0.6477377414703369, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.4679510487194855, "rewards/wordcountpos_reward/raw_geo/std": 0.36318993876003186, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 1282.0625, "completions/mean_terminated_length": 1250.9285888671875, "completions/min_length": 1036.0, "completions/min_terminated_length": 1036.0, "epoch": 0.9523904780956192, "frac_reward_zero_std": 0.0, "grad_norm": 2.7349710209797164, "kl": 0.015838623046875, "learning_rate": 1.0625227153164266e-07, "loss": -0.0689, "num_tokens": 208583524.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6156750917434692, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06836099299849702, "rewards/wordcountpos_reward/raw_geo/std": 0.2204955994349294, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07391185942027816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1093.6875, "completions/mean_terminated_length": 1066.60009765625, "completions/min_length": 946.0, "completions/min_terminated_length": 946.0, "epoch": 0.9525905181036207, "frac_reward_zero_std": 0.0, "grad_norm": 3.0119476028079326, "kl": 0.01165771484375, "learning_rate": 1.0620018095247341e-07, "loss": 0.0188, "num_tokens": 208630719.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0299670696258545, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.01607811550972186, "rewards/wordcountpos_reward/raw_geo/std": 0.10695407311092867, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.141878925953186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 1230.125, "completions/mean_terminated_length": 1230.125, "completions/min_length": 1035.0, "completions/min_terminated_length": 1035.0, "epoch": 0.9527905581116223, "frac_reward_zero_std": 0.0, "grad_norm": 3.24135629500014, "kl": 0.0294189453125, "learning_rate": 1.0614830677211111e-07, "loss": 0.0009, "num_tokens": 208670569.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8363125324249268, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.020194436331816827, "rewards/wordcountpos_reward/raw_geo/std": 0.04229158657532829, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 1166.4375, "completions/mean_terminated_length": 1144.2000732421875, "completions/min_length": 779.0, "completions/min_terminated_length": 779.0, "epoch": 0.9529905981196239, "frac_reward_zero_std": 0.0, "grad_norm": 3.297111425774438, "kl": 0.0210418701171875, "learning_rate": 1.0609664901584998e-07, "loss": 0.0037, "num_tokens": 208719312.0, "reward": 0.0, "reward_std": 0.7610864639282227, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.027102977429535983, "rewards/wordcountpos_reward/raw_geo/std": 0.060665370268892356, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1057.1875, "completions/mean_terminated_length": 993.9285888671875, "completions/min_length": 624.0, "completions/min_terminated_length": 624.0, "epoch": 0.9531906381276255, "frac_reward_zero_std": 0.0, "grad_norm": 2.600321353126247, "kl": 0.0166015625, "learning_rate": 1.0604520770887854e-07, "loss": -0.0497, "num_tokens": 208764851.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9003808498382568, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15135198788333631, "rewards/wordcountpos_reward/raw_geo/std": 0.1663485753084715, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 1002.375, "completions/mean_terminated_length": 1002.375, "completions/min_length": 708.0, "completions/min_terminated_length": 708.0, "epoch": 0.9533906781356272, "frac_reward_zero_std": 0.0, "grad_norm": 3.6982253024624576, "kl": 0.02496337890625, "learning_rate": 1.0599398287627971e-07, "loss": -0.0138, "num_tokens": 208795673.0, "reward": 0.0, "reward_std": 0.737007737159729, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09051773362578502, "rewards/wordcountpos_reward/raw_geo/std": 0.12646353368923952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7875, "rewards/wordcountpos_reward/raw_rule/std": 0.10671873729054751, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 938.375, "completions/mean_terminated_length": 938.375, "completions/min_length": 718.0, "completions/min_terminated_length": 718.0, "epoch": 0.9535907181436287, "frac_reward_zero_std": 0.0, "grad_norm": 3.6937574241185085, "kl": 0.0167083740234375, "learning_rate": 1.0594297454303103e-07, "loss": 0.0149, "num_tokens": 208822663.0, "reward": 0.0, "reward_std": 0.8899445533752441, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10884538201062752, "rewards/wordcountpos_reward/raw_geo/std": 0.07198474965066244, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1170.4375, "completions/mean_terminated_length": 1170.4375, "completions/min_length": 991.0, "completions/min_terminated_length": 991.0, "epoch": 0.9537907581516303, "frac_reward_zero_std": 0.0, "grad_norm": 3.1545597418470726, "kl": 0.0185546875, "learning_rate": 1.0589218273400441e-07, "loss": 0.0056, "num_tokens": 208859502.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8566383719444275, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19307178069769607, "rewards/wordcountpos_reward/raw_geo/std": 0.20779966260006968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1433.0, "completions/mean_length": 1224.4375, "completions/mean_terminated_length": 1185.071533203125, "completions/min_length": 996.0, "completions/min_terminated_length": 996.0, "epoch": 0.953990798159632, "frac_reward_zero_std": 0.0, "grad_norm": 2.0672680222107394, "kl": 0.0098876953125, "learning_rate": 1.0584160747396613e-07, "loss": -0.0103, "num_tokens": 208899813.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8635209798812866, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06809191287472208, "rewards/wordcountpos_reward/raw_geo/std": 0.07057473211247162, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 951.25, "completions/mean_terminated_length": 951.25, "completions/min_length": 758.0, "completions/min_terminated_length": 758.0, "epoch": 0.9541908381676335, "frac_reward_zero_std": 0.0, "grad_norm": 3.193427504941948, "kl": 0.01445770263671875, "learning_rate": 1.0579124878757691e-07, "loss": -0.0212, "num_tokens": 208930689.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9292389750480652, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14459075722453896, "rewards/wordcountpos_reward/raw_geo/std": 0.1925979549562579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1274.4375, "completions/mean_terminated_length": 1222.3846435546875, "completions/min_length": 1067.0, "completions/min_terminated_length": 1067.0, "epoch": 0.9543908781756352, "frac_reward_zero_std": 0.0, "grad_norm": 3.4341834307717494, "kl": 0.01995849609375, "learning_rate": 1.0574110669939191e-07, "loss": 0.0123, "num_tokens": 208977920.0, "reward": 0.0, "reward_std": 0.948871910572052, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.035622852766604395, "rewards/wordcountpos_reward/raw_geo/std": 0.12031706815541532, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818418, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1187.5, "completions/mean_terminated_length": 1187.5, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.9545909181836367, "frac_reward_zero_std": 0.0, "grad_norm": 3.4002616885979653, "kl": 0.018585205078125, "learning_rate": 1.0569118123386063e-07, "loss": -0.0204, "num_tokens": 209028400.0, "reward": 0.0, "reward_std": 0.7962775230407715, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09525125372672992, "rewards/wordcountpos_reward/raw_geo/std": 0.14259013089069217, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1137.125, "completions/mean_terminated_length": 1137.125, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.9547909581916383, "frac_reward_zero_std": 0.0, "grad_norm": 3.0264452180307098, "kl": 0.0145721435546875, "learning_rate": 1.0564147241532695e-07, "loss": 0.019, "num_tokens": 209074434.0, "reward": 0.0, "reward_std": 0.8791409730911255, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.026353780494870915, "rewards/wordcountpos_reward/raw_geo/std": 0.12823988630469374, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477444, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 939.3125, "completions/mean_terminated_length": 939.3125, "completions/min_length": 618.0, "completions/min_terminated_length": 618.0, "epoch": 0.95499099819964, "frac_reward_zero_std": 0.0, "grad_norm": 3.8528050481214113, "kl": 0.0167083740234375, "learning_rate": 1.0559198026802905e-07, "loss": 0.073, "num_tokens": 209106575.0, "reward": 0.0, "reward_std": 0.5094962120056152, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1773260013012169, "rewards/wordcountpos_reward/raw_geo/std": 0.19401762198601002, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1224.0, "completions/max_terminated_length": 1224.0, "completions/mean_length": 1064.125, "completions/mean_terminated_length": 1064.125, "completions/min_length": 641.0, "completions/min_terminated_length": 641.0, "epoch": 0.9551910382076415, "frac_reward_zero_std": 0.0, "grad_norm": 2.80351804210876, "kl": 0.013427734375, "learning_rate": 1.0554270481609961e-07, "loss": -0.0173, "num_tokens": 209151065.0, "reward": -1.4901161193847656e-08, "reward_std": 1.018952488899231, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016027240456593178, "rewards/wordcountpos_reward/raw_geo/std": 0.08634157714034647, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059628479399994376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1323.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 1066.3125, "completions/mean_terminated_length": 1066.3125, "completions/min_length": 916.0, "completions/min_terminated_length": 916.0, "epoch": 0.9553910782156432, "frac_reward_zero_std": 0.0, "grad_norm": 3.0849022402858735, "kl": 0.019317626953125, "learning_rate": 1.0549364608356561e-07, "loss": 0.015, "num_tokens": 209189374.0, "reward": 0.0, "reward_std": 0.8767688274383545, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.09847175304993494, "rewards/wordcountpos_reward/raw_geo/std": 0.2157675939576443, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1096.0, "completions/mean_terminated_length": 1069.0667724609375, "completions/min_length": 855.0, "completions/min_terminated_length": 855.0, "epoch": 0.9555911182236447, "frac_reward_zero_std": 0.0, "grad_norm": 3.579319263848328, "kl": 0.018707275390625, "learning_rate": 1.054448040943482e-07, "loss": -0.0112, "num_tokens": 209239382.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5345796942710876, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06414641192903923, "rewards/wordcountpos_reward/raw_geo/std": 0.06417131763463622, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1146.0, "completions/max_terminated_length": 1146.0, "completions/mean_length": 970.0, "completions/mean_terminated_length": 970.0, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.9557911582316463, "frac_reward_zero_std": 0.0, "grad_norm": 3.1663800945115685, "kl": 0.0151519775390625, "learning_rate": 1.0539617887226293e-07, "loss": -0.0093, "num_tokens": 209275374.0, "reward": 0.0, "reward_std": 0.9754102230072021, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.02585422751021243, "rewards/wordcountpos_reward/raw_geo/std": 0.07422599491563968, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1255.125, "completions/mean_terminated_length": 1173.5, "completions/min_length": 994.0, "completions/min_terminated_length": 994.0, "epoch": 0.955991198239648, "frac_reward_zero_std": 0.0, "grad_norm": 3.2578462954018996, "kl": 0.020233154296875, "learning_rate": 1.053477704410198e-07, "loss": -0.002, "num_tokens": 209321464.0, "reward": 0.0, "reward_std": 0.8973460793495178, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3604331180596339, "rewards/wordcountpos_reward/raw_geo/std": 0.46431910330194853, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 1073.0625, "completions/mean_terminated_length": 1073.0625, "completions/min_length": 927.0, "completions/min_terminated_length": 927.0, "epoch": 0.9561912382476495, "frac_reward_zero_std": 0.0, "grad_norm": 3.0123003105845307, "kl": 0.01125335693359375, "learning_rate": 1.0529957882422293e-07, "loss": -0.0276, "num_tokens": 209367729.0, "reward": 0.0, "reward_std": 0.8103455305099487, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08068156181972061, "rewards/wordcountpos_reward/raw_geo/std": 0.07028980068154729, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.16329931618554522, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 1034.875, "completions/mean_terminated_length": 1034.875, "completions/min_length": 902.0, "completions/min_terminated_length": 902.0, "epoch": 0.9563912782556512, "frac_reward_zero_std": 0.0, "grad_norm": 3.734047866037615, "kl": 0.020050048828125, "learning_rate": 1.0525160404537075e-07, "loss": -0.0097, "num_tokens": 209409351.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9816040992736816, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03291853323047933, "rewards/wordcountpos_reward/raw_geo/std": 0.12424845427229726, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.12141145226353543, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1469.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1349.25, "completions/mean_terminated_length": 1349.25, "completions/min_length": 1105.0, "completions/min_terminated_length": 1105.0, "epoch": 0.9565913182636527, "frac_reward_zero_std": 0.0, "grad_norm": 2.1403140428600675, "kl": 0.0112152099609375, "learning_rate": 1.0520384612785604e-07, "loss": -0.0127, "num_tokens": 209448139.0, "reward": -7.450580596923828e-09, "reward_std": 1.0500397682189941, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.022916704563257388, "rewards/wordcountpos_reward/raw_geo/std": 0.17815424762180435, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 1135.25, "completions/mean_terminated_length": 1135.25, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.9567913582716543, "frac_reward_zero_std": 0.0, "grad_norm": 2.3729214879829468, "kl": 0.01209259033203125, "learning_rate": 1.0515630509496571e-07, "loss": 0.0084, "num_tokens": 209490759.0, "reward": -7.450580596923828e-09, "reward_std": 1.0544626712799072, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.058545697106813926, "rewards/wordcountpos_reward/raw_geo/std": 0.12464920406739673, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06191391873668902, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1269.3125, "completions/mean_terminated_length": 1164.45458984375, "completions/min_length": 977.0, "completions/min_terminated_length": 977.0, "epoch": 0.956991398279656, "frac_reward_zero_std": 0.0, "grad_norm": 3.2469745313110665, "kl": 0.027191162109375, "learning_rate": 1.0510898096988097e-07, "loss": 0.0132, "num_tokens": 209540964.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8454891443252563, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1496618971329071, "rewards/wordcountpos_reward/raw_geo/std": 0.12033624231748107, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1466.0, "completions/max_terminated_length": 1466.0, "completions/mean_length": 1078.25, "completions/mean_terminated_length": 1078.25, "completions/min_length": 451.0, "completions/min_terminated_length": 451.0, "epoch": 0.9571914382876575, "frac_reward_zero_std": 0.0, "grad_norm": 2.5578646167765084, "kl": 0.013946533203125, "learning_rate": 1.0506187377567739e-07, "loss": -0.0525, "num_tokens": 209576944.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5801653861999512, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.12041441511487878, "rewards/wordcountpos_reward/raw_geo/std": 0.16276957410144396, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 994.5, "completions/mean_terminated_length": 960.800048828125, "completions/min_length": 568.0, "completions/min_terminated_length": 568.0, "epoch": 0.9573914782956592, "frac_reward_zero_std": 0.0, "grad_norm": 3.4098991455216994, "kl": 0.0176239013671875, "learning_rate": 1.0501498353532451e-07, "loss": 0.0085, "num_tokens": 209611768.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6443818211555481, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.053891097814233166, "rewards/wordcountpos_reward/raw_geo/std": 0.0353121778781497, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1205.625, "completions/mean_terminated_length": 1186.0001220703125, "completions/min_length": 944.0, "completions/min_terminated_length": 944.0, "epoch": 0.9575915183036607, "frac_reward_zero_std": 0.0, "grad_norm": 2.9503223158901646, "kl": 0.0119781494140625, "learning_rate": 1.0496831027168638e-07, "loss": -0.04, "num_tokens": 209663418.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9341143369674683, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.024837168501766982, "rewards/wordcountpos_reward/raw_geo/std": 0.09942057476509658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1425.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1177.625, "completions/mean_terminated_length": 1177.625, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.9577915583116623, "frac_reward_zero_std": 0.0, "grad_norm": 2.993430869366721, "kl": 0.0143890380859375, "learning_rate": 1.0492185400752086e-07, "loss": 0.0105, "num_tokens": 209702140.0, "reward": -1.4901161193847656e-08, "reward_std": 0.912726640701294, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07385274483026923, "rewards/wordcountpos_reward/raw_geo/std": 0.05145581723982869, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1260.0, "completions/max_terminated_length": 1260.0, "completions/mean_length": 1033.25, "completions/mean_terminated_length": 1033.25, "completions/min_length": 835.0, "completions/min_terminated_length": 835.0, "epoch": 0.957991598319664, "frac_reward_zero_std": 0.0, "grad_norm": 2.7204953503555576, "kl": 0.01544189453125, "learning_rate": 1.0487561476548054e-07, "loss": 0.0005, "num_tokens": 209747752.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8105976581573486, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.002028857886069582, "rewards/wordcountpos_reward/raw_geo/std": 0.11675734572323047, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891873, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1457.0, "completions/mean_length": 1187.625, "completions/mean_terminated_length": 1143.0, "completions/min_length": 960.0, "completions/min_terminated_length": 960.0, "epoch": 0.9581916383276655, "frac_reward_zero_std": 0.0, "grad_norm": 3.3056160007546778, "kl": 0.0177764892578125, "learning_rate": 1.0482959256811164e-07, "loss": 0.0036, "num_tokens": 209795490.0, "reward": 2.9802322387695312e-08, "reward_std": 0.65792316198349, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07615181903403011, "rewards/wordcountpos_reward/raw_geo/std": 0.08759155429030584, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 1069.0, "completions/mean_terminated_length": 1069.0, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.9583916783356672, "frac_reward_zero_std": 0.0, "grad_norm": 2.9513925890032846, "kl": 0.0154266357421875, "learning_rate": 1.0478378743785488e-07, "loss": 0.0305, "num_tokens": 209844746.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9295662641525269, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0014599551885080995, "rewards/wordcountpos_reward/raw_geo/std": 0.08639542977091033, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258098, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1234.0, "completions/mean_terminated_length": 1145.3333740234375, "completions/min_length": 922.0, "completions/min_terminated_length": 922.0, "epoch": 0.9585917183436687, "frac_reward_zero_std": 0.0, "grad_norm": 2.7610013985138604, "kl": 0.0125579833984375, "learning_rate": 1.047381993970451e-07, "loss": -0.0468, "num_tokens": 209882138.0, "reward": 0.0, "reward_std": 0.7876117825508118, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.013081778856320739, "rewards/wordcountpos_reward/raw_geo/std": 0.08739563391802788, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1021.8125, "completions/mean_terminated_length": 1021.8125, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.9587917583516703, "frac_reward_zero_std": 0.0, "grad_norm": 3.168556371973078, "kl": 0.0149383544921875, "learning_rate": 1.0469282846791113e-07, "loss": -0.029, "num_tokens": 209921431.0, "reward": 7.450580596923828e-09, "reward_std": 1.0180799961090088, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.04200769810336504, "rewards/wordcountpos_reward/raw_geo/std": 0.07733504656652168, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.067631901304592, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1168.4375, "completions/mean_terminated_length": 1168.4375, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.958991798359672, "frac_reward_zero_std": 0.0, "grad_norm": 3.5649991379884085, "kl": 0.02020263671875, "learning_rate": 1.0464767467257613e-07, "loss": -0.0078, "num_tokens": 209956046.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0103517770767212, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09917800049681075, "rewards/wordcountpos_reward/raw_geo/std": 0.13371502281469944, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1462.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1140.0, "completions/mean_terminated_length": 1140.0, "completions/min_length": 858.0, "completions/min_terminated_length": 858.0, "epoch": 0.9591918383676735, "frac_reward_zero_std": 0.0, "grad_norm": 3.377480329972979, "kl": 0.0253448486328125, "learning_rate": 1.0460273803305725e-07, "loss": 0.0336, "num_tokens": 209996438.0, "reward": -2.9802322387695312e-08, "reward_std": 0.852980375289917, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.132139195332164, "rewards/wordcountpos_reward/raw_geo/std": 0.06844517904050379, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1309.3125, "completions/mean_terminated_length": 1309.3125, "completions/min_length": 1212.0, "completions/min_terminated_length": 1212.0, "epoch": 0.9593918783756752, "frac_reward_zero_std": 0.0, "grad_norm": 2.0251904809604966, "kl": 0.0087127685546875, "learning_rate": 1.0455801857126581e-07, "loss": -0.0003, "num_tokens": 210048355.0, "reward": 5.960464477539063e-08, "reward_std": 0.8969354033470154, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06636343250808371, "rewards/wordcountpos_reward/raw_geo/std": 0.07665162531819611, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 899.4375, "completions/mean_terminated_length": 899.4375, "completions/min_length": 582.0, "completions/min_terminated_length": 582.0, "epoch": 0.9595919183836767, "frac_reward_zero_std": 0.0, "grad_norm": 3.336585625433838, "kl": 0.01629638671875, "learning_rate": 1.0451351630900724e-07, "loss": 0.0109, "num_tokens": 210077418.0, "reward": 0.0, "reward_std": 0.8547691106796265, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0010416140038854303, "rewards/wordcountpos_reward/raw_geo/std": 0.22710738527717475, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1473.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1094.1875, "completions/mean_terminated_length": 1094.1875, "completions/min_length": 805.0, "completions/min_terminated_length": 805.0, "epoch": 0.9597919583916783, "frac_reward_zero_std": 0.0, "grad_norm": 3.2836463793754094, "kl": 0.016998291015625, "learning_rate": 1.0446923126798101e-07, "loss": -0.0813, "num_tokens": 210111917.0, "reward": 3.725290298461914e-09, "reward_std": 1.0338724851608276, "rewards/wordcountpos_reward/mean": 3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.048516677293306276, "rewards/wordcountpos_reward/raw_geo/std": 0.1387956278372233, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1253144193766372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 1094.0, "completions/mean_terminated_length": 1066.933349609375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.95999199839968, "frac_reward_zero_std": 0.0, "grad_norm": 3.184467384464812, "kl": 0.01708984375, "learning_rate": 1.0442516346978073e-07, "loss": -0.0316, "num_tokens": 210153317.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9468871355056763, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05242376764484644, "rewards/wordcountpos_reward/raw_geo/std": 0.09480700630974297, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1080980350662545, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1084.75, "completions/mean_terminated_length": 1084.75, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9601920384076815, "frac_reward_zero_std": 0.0, "grad_norm": 3.388939095491474, "kl": 0.019195556640625, "learning_rate": 1.0438131293589407e-07, "loss": -0.0256, "num_tokens": 210191849.0, "reward": -7.450580596923828e-09, "reward_std": 0.9905945658683777, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.1619469511753605, "rewards/wordcountpos_reward/raw_geo/std": 0.07616172895843219, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 1223.5625, "completions/mean_terminated_length": 1223.5625, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.9603920784156831, "frac_reward_zero_std": 0.0, "grad_norm": 2.3061872467718816, "kl": 0.0097503662109375, "learning_rate": 1.043376796877027e-07, "loss": -0.0095, "num_tokens": 210232602.0, "reward": 2.60770320892334e-08, "reward_std": 1.0488935708999634, "rewards/wordcountpos_reward/mean": 2.60770320892334e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.021587482965557257, "rewards/wordcountpos_reward/raw_geo/std": 0.06359842173992514, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.05163977794943221, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 1085.5625, "completions/mean_terminated_length": 1085.5625, "completions/min_length": 881.0, "completions/min_terminated_length": 881.0, "epoch": 0.9605921184236847, "frac_reward_zero_std": 0.0, "grad_norm": 3.3465579999165316, "kl": 0.016937255859375, "learning_rate": 1.0429426374648246e-07, "loss": 0.0724, "num_tokens": 210283723.0, "reward": 0.0, "reward_std": 0.780301570892334, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10127982372149447, "rewards/wordcountpos_reward/raw_geo/std": 0.100395398578427, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0806225774829855, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 1022.3125, "completions/mean_terminated_length": 1022.3125, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.9607921584316863, "frac_reward_zero_std": 0.0, "grad_norm": 3.304683443054784, "kl": 0.014892578125, "learning_rate": 1.0425106513340309e-07, "loss": -0.0285, "num_tokens": 210327760.0, "reward": 0.0, "reward_std": 0.8651178479194641, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011677278784421059, "rewards/wordcountpos_reward/raw_geo/std": 0.09948770725772087, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10852547064066471, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1121.0625, "completions/mean_terminated_length": 1121.0625, "completions/min_length": 918.0, "completions/min_terminated_length": 918.0, "epoch": 0.960992198439688, "frac_reward_zero_std": 0.0, "grad_norm": 2.587451821375289, "kl": 0.012969970703125, "learning_rate": 1.042080838695285e-07, "loss": -0.0076, "num_tokens": 210361665.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9488906860351562, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0938418963461225, "rewards/wordcountpos_reward/raw_geo/std": 0.07653284838709166, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 1073.75, "completions/mean_terminated_length": 1073.75, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.9611922384476895, "frac_reward_zero_std": 0.0, "grad_norm": 2.3329890361092374, "kl": 0.0112457275390625, "learning_rate": 1.041653199758165e-07, "loss": -0.024, "num_tokens": 210399581.0, "reward": 7.450580596923828e-09, "reward_std": 1.0192477703094482, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.0912368161810635, "rewards/wordcountpos_reward/raw_geo/std": 0.08923492885142004, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1462.0, "completions/mean_length": 1322.75, "completions/mean_terminated_length": 1310.933349609375, "completions/min_length": 1138.0, "completions/min_terminated_length": 1138.0, "epoch": 0.9613922784556911, "frac_reward_zero_std": 0.0, "grad_norm": 2.49134080928202, "kl": 0.0136260986328125, "learning_rate": 1.0412277347311898e-07, "loss": -0.0003, "num_tokens": 210452841.0, "reward": 0.0, "reward_std": 0.570915162563324, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1075754721963015, "rewards/wordcountpos_reward/raw_geo/std": 0.3806702423760525, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 1109.1875, "completions/mean_terminated_length": 1109.1875, "completions/min_length": 998.0, "completions/min_terminated_length": 998.0, "epoch": 0.9615923184636928, "frac_reward_zero_std": 0.0, "grad_norm": 3.0337759426828814, "kl": 0.0148468017578125, "learning_rate": 1.0408044438218181e-07, "loss": -0.0105, "num_tokens": 210487348.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8357559442520142, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.15816994524580363, "rewards/wordcountpos_reward/raw_geo/std": 0.1298721564030794, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04013864859597431, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 1218.5625, "completions/mean_terminated_length": 1218.5625, "completions/min_length": 1011.0, "completions/min_terminated_length": 1011.0, "epoch": 0.9617923584716943, "frac_reward_zero_std": 0.0, "grad_norm": 2.202331849987693, "kl": 0.009857177734375, "learning_rate": 1.0403833272364482e-07, "loss": 0.0237, "num_tokens": 210532957.0, "reward": -7.450580596923828e-09, "reward_std": 1.0077060461044312, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.1383060930595443, "rewards/wordcountpos_reward/raw_geo/std": 0.17181216178644362, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 903.75, "completions/mean_terminated_length": 903.75, "completions/min_length": 638.0, "completions/min_terminated_length": 638.0, "epoch": 0.961992398479696, "frac_reward_zero_std": 0.0, "grad_norm": 3.452745364163711, "kl": 0.015899658203125, "learning_rate": 1.0399643851804188e-07, "loss": -0.0371, "num_tokens": 210578289.0, "reward": 0.0, "reward_std": 0.4502432644367218, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.10564431400285, "rewards/wordcountpos_reward/raw_geo/std": 0.1262339790838317, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09803627446568496, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1272.0, "completions/mean_length": 1124.375, "completions/mean_terminated_length": 1099.3333740234375, "completions/min_length": 937.0, "completions/min_terminated_length": 937.0, "epoch": 0.9621924384876975, "frac_reward_zero_std": 0.0, "grad_norm": 2.1067988033404994, "kl": 0.010345458984375, "learning_rate": 1.0395476178580072e-07, "loss": 0.0151, "num_tokens": 210624527.0, "reward": -5.960464477539063e-08, "reward_std": 0.9094982743263245, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19585198408442253, "rewards/wordcountpos_reward/raw_geo/std": 0.1303299187770672, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820636, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1208.875, "completions/mean_terminated_length": 1189.4666748046875, "completions/min_length": 1004.0, "completions/min_terminated_length": 1004.0, "epoch": 0.9623924784956991, "frac_reward_zero_std": 0.0, "grad_norm": 3.1712497558183674, "kl": 0.018951416015625, "learning_rate": 1.0391330254724322e-07, "loss": -0.0172, "num_tokens": 210665917.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7790090441703796, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.18845822925645433, "rewards/wordcountpos_reward/raw_geo/std": 0.11207521241872537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04127594582445935, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1177.75, "completions/mean_terminated_length": 1156.2667236328125, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.9625925185037008, "frac_reward_zero_std": 0.0, "grad_norm": 2.830887075641281, "kl": 0.0150909423828125, "learning_rate": 1.0387206082258501e-07, "loss": -0.0598, "num_tokens": 210713417.0, "reward": 0.0, "reward_std": 0.7916472554206848, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04391039573928953, "rewards/wordcountpos_reward/raw_geo/std": 0.1719207612459232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.10461569884316811, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1146.25, "completions/mean_terminated_length": 1146.25, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.9627925585117023, "frac_reward_zero_std": 0.0, "grad_norm": 3.165718665457729, "kl": 0.0159454345703125, "learning_rate": 1.0383103663193576e-07, "loss": 0.0332, "num_tokens": 210758925.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9204449653625488, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.041129791314772055, "rewards/wordcountpos_reward/raw_geo/std": 0.2220484932176714, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 1180.5, "completions/mean_terminated_length": 1180.5, "completions/min_length": 1059.0, "completions/min_terminated_length": 1059.0, "epoch": 0.962992598519704, "frac_reward_zero_std": 0.0, "grad_norm": 3.172942751169138, "kl": 0.0151214599609375, "learning_rate": 1.0379022999529905e-07, "loss": 0.0174, "num_tokens": 210804565.0, "reward": 0.0, "reward_std": 0.605470597743988, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.12523924267396574, "rewards/wordcountpos_reward/raw_geo/std": 0.14209509785624144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1361.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1209.4375, "completions/mean_terminated_length": 1209.4375, "completions/min_length": 978.0, "completions/min_terminated_length": 978.0, "epoch": 0.9631926385277055, "frac_reward_zero_std": 0.0, "grad_norm": 2.960530145090134, "kl": 0.0184783935546875, "learning_rate": 1.0374964093257245e-07, "loss": -0.0059, "num_tokens": 210850924.0, "reward": 0.0, "reward_std": 0.5841987729072571, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06578777413687394, "rewards/wordcountpos_reward/raw_geo/std": 0.20873976584283377, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07302967433402213, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1194.6875, "completions/mean_terminated_length": 1151.071533203125, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.9633926785357071, "frac_reward_zero_std": 0.0, "grad_norm": 3.143624851717267, "kl": 0.0147247314453125, "learning_rate": 1.037092694635473e-07, "loss": 0.006, "num_tokens": 210888855.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6089115142822266, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11718801414072365, "rewards/wordcountpos_reward/raw_geo/std": 0.1357606049805931, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1137.6875, "completions/mean_terminated_length": 1113.533447265625, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.9635927185437088, "frac_reward_zero_std": 0.0, "grad_norm": 3.447501662045999, "kl": 0.0162811279296875, "learning_rate": 1.0366911560790884e-07, "loss": 0.0383, "num_tokens": 210938882.0, "reward": 0.0, "reward_std": 0.9192066192626953, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 6.949811485065967e-05, "rewards/wordcountpos_reward/raw_geo/std": 0.07942718631566348, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1290.6875, "completions/mean_terminated_length": 1195.5455322265625, "completions/min_length": 786.0, "completions/min_terminated_length": 786.0, "epoch": 0.9637927585517103, "frac_reward_zero_std": 0.0, "grad_norm": 2.4801086935771575, "kl": 0.012054443359375, "learning_rate": 1.0362917938523647e-07, "loss": 0.0182, "num_tokens": 210994325.0, "reward": -1.4901161193847656e-08, "reward_std": 1.003495454788208, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.21956128266795538, "rewards/wordcountpos_reward/raw_geo/std": 0.15706072592573772, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.07290277645477446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 1041.75, "completions/mean_terminated_length": 1041.75, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.963992798559712, "frac_reward_zero_std": 0.0, "grad_norm": 3.16129465468248, "kl": 0.0171051025390625, "learning_rate": 1.0358946081500309e-07, "loss": -0.0138, "num_tokens": 211044889.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0187177658081055, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004286540697749387, "rewards/wordcountpos_reward/raw_geo/std": 0.06663450298553149, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593314, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1162.0, "completions/mean_terminated_length": 1162.0, "completions/min_length": 912.0, "completions/min_terminated_length": 912.0, "epoch": 0.9641928385677135, "frac_reward_zero_std": 0.0, "grad_norm": 3.4808701966077953, "kl": 0.020721435546875, "learning_rate": 1.0354995991657575e-07, "loss": -0.019, "num_tokens": 211090617.0, "reward": -1.4901161193847656e-08, "reward_std": 1.041210412979126, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.03064878701034332, "rewards/wordcountpos_reward/raw_geo/std": 0.18485823466132542, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05146016078626404, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 1213.25, "completions/mean_terminated_length": 1172.2857666015625, "completions/min_length": 824.0, "completions/min_terminated_length": 824.0, "epoch": 0.9643928785757151, "frac_reward_zero_std": 0.0, "grad_norm": 3.0682254268382887, "kl": 0.020416259765625, "learning_rate": 1.0351067670921523e-07, "loss": -0.0193, "num_tokens": 211138709.0, "reward": 4.470348358154297e-08, "reward_std": 1.0577739477157593, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.009272507975254567, "rewards/wordcountpos_reward/raw_geo/std": 0.09447567465996491, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08073734277593311, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 1004.875, "completions/mean_terminated_length": 1004.875, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.9645929185837168, "frac_reward_zero_std": 0.0, "grad_norm": 3.4591615307689367, "kl": 0.0184326171875, "learning_rate": 1.0347161121207616e-07, "loss": -0.0266, "num_tokens": 211176475.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9155535101890564, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.12919461511624547, "rewards/wordcountpos_reward/raw_geo/std": 0.14160011928383495, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1054.0625, "completions/mean_terminated_length": 1054.0625, "completions/min_length": 767.0, "completions/min_terminated_length": 767.0, "epoch": 0.9647929585917183, "frac_reward_zero_std": 0.0, "grad_norm": 3.1509539467662147, "kl": 0.0172119140625, "learning_rate": 1.0343276344420721e-07, "loss": 0.0008, "num_tokens": 211227732.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5473648309707642, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19384365396902722, "rewards/wordcountpos_reward/raw_geo/std": 0.17003429280950824, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.08027729719194864, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1463.0, "completions/mean_length": 1176.25, "completions/mean_terminated_length": 1154.666748046875, "completions/min_length": 832.0, "completions/min_terminated_length": 832.0, "epoch": 0.96499299859972, "frac_reward_zero_std": 0.0, "grad_norm": 2.90382859904104, "kl": 0.014862060546875, "learning_rate": 1.0339413342455055e-07, "loss": -0.0108, "num_tokens": 211274864.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8729248046875, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04459375122073113, "rewards/wordcountpos_reward/raw_geo/std": 0.094328979836304, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06871842709362767, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1406.6875, "completions/mean_terminated_length": 1350.7000732421875, "completions/min_length": 1163.0, "completions/min_terminated_length": 1163.0, "epoch": 0.9651930386077215, "frac_reward_zero_std": 0.0, "grad_norm": 2.566998156167939, "kl": 0.014739990234375, "learning_rate": 1.0335572117194247e-07, "loss": -0.0067, "num_tokens": 211318003.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9195196628570557, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1822304606232477, "rewards/wordcountpos_reward/raw_geo/std": 0.21441439286741412, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1104.25, "completions/mean_terminated_length": 1047.71435546875, "completions/min_length": 739.0, "completions/min_terminated_length": 739.0, "epoch": 0.9653930786157231, "frac_reward_zero_std": 0.0, "grad_norm": 3.528729201886777, "kl": 0.01513671875, "learning_rate": 1.0331752670511287e-07, "loss": 0.0051, "num_tokens": 211370751.0, "reward": 0.0, "reward_std": 0.9696059226989746, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.23458977168731462, "rewards/wordcountpos_reward/raw_geo/std": 0.11595825933364537, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1135.3125, "completions/mean_terminated_length": 1111.0, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.9655931186237248, "frac_reward_zero_std": 0.0, "grad_norm": 2.904236946109252, "kl": 0.0177001953125, "learning_rate": 1.032795500426856e-07, "loss": 0.0265, "num_tokens": 211414740.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8867334127426147, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01825390811644253, "rewards/wordcountpos_reward/raw_geo/std": 0.18346256433488542, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1114.0625, "completions/mean_terminated_length": 1114.0625, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.9657931586317263, "frac_reward_zero_std": 0.0, "grad_norm": 3.583265300381446, "kl": 0.0168914794921875, "learning_rate": 1.0324179120317822e-07, "loss": 0.0012, "num_tokens": 211468845.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9911655187606812, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.25643380017050793, "rewards/wordcountpos_reward/raw_geo/std": 0.05545476631603586, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237131, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 1214.25, "completions/mean_terminated_length": 1173.4285888671875, "completions/min_length": 999.0, "completions/min_terminated_length": 999.0, "epoch": 0.965993198639728, "frac_reward_zero_std": 0.0, "grad_norm": 3.2042938936179337, "kl": 0.0176849365234375, "learning_rate": 1.0320425020500223e-07, "loss": 0.0093, "num_tokens": 211513465.0, "reward": 0.0, "reward_std": 0.5937833786010742, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.13462508168059778, "rewards/wordcountpos_reward/raw_geo/std": 0.14768030419810965, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1222.25, "completions/mean_terminated_length": 1182.571533203125, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.9661932386477295, "frac_reward_zero_std": 0.0, "grad_norm": 2.997891177374238, "kl": 0.0196533203125, "learning_rate": 1.031669270664626e-07, "loss": -0.0104, "num_tokens": 211557645.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8115700483322144, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08869683846842091, "rewards/wordcountpos_reward/raw_geo/std": 0.07528155427564674, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238704, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1210.125, "completions/mean_terminated_length": 1168.71435546875, "completions/min_length": 801.0, "completions/min_terminated_length": 801.0, "epoch": 0.9663932786557311, "frac_reward_zero_std": 0.0, "grad_norm": 3.0089304955935994, "kl": 0.0154571533203125, "learning_rate": 1.0312982180575847e-07, "loss": -0.0009, "num_tokens": 211608719.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9866645336151123, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04164541514465623, "rewards/wordcountpos_reward/raw_geo/std": 0.06902949367992099, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1423.0, "completions/mean_length": 1207.25, "completions/mean_terminated_length": 1165.4285888671875, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 0.9665933186637328, "frac_reward_zero_std": 0.0, "grad_norm": 2.9023866595888106, "kl": 0.0140380859375, "learning_rate": 1.0309293444098231e-07, "loss": -0.0088, "num_tokens": 211664579.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7301797866821289, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09319971988400778, "rewards/wordcountpos_reward/raw_geo/std": 0.12247176902875852, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06666666666666665, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 1114.1875, "completions/mean_terminated_length": 1114.1875, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9667933586717343, "frac_reward_zero_std": 0.0, "grad_norm": 3.4689666687722838, "kl": 0.0153656005859375, "learning_rate": 1.0305626499012074e-07, "loss": 0.0082, "num_tokens": 211700294.0, "reward": -7.450580596923828e-09, "reward_std": 0.9945249557495117, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.01717344044459398, "rewards/wordcountpos_reward/raw_geo/std": 0.1151733146120633, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115677, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1340.8125, "completions/mean_terminated_length": 1245.300048828125, "completions/min_length": 895.0, "completions/min_terminated_length": 895.0, "epoch": 0.966993398679736, "frac_reward_zero_std": 0.0, "grad_norm": 3.2563723534389064, "kl": 0.022918701171875, "learning_rate": 1.0301981347105386e-07, "loss": -0.0135, "num_tokens": 211754043.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9433789253234863, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.061529006114210257, "rewards/wordcountpos_reward/raw_geo/std": 0.05575184096531753, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845024, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1017.875, "completions/mean_terminated_length": 1017.875, "completions/min_length": 782.0, "completions/min_terminated_length": 782.0, "epoch": 0.9671934386877376, "frac_reward_zero_std": 0.0, "grad_norm": 2.878961403380464, "kl": 0.014862060546875, "learning_rate": 1.0298357990155564e-07, "loss": -0.0675, "num_tokens": 211789929.0, "reward": 1.4901161193847656e-08, "reward_std": 0.918804407119751, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07874955771304294, "rewards/wordcountpos_reward/raw_geo/std": 0.16776529755758962, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1198.75, "completions/mean_terminated_length": 1198.75, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.9673934786957391, "frac_reward_zero_std": 0.0, "grad_norm": 3.1526056729687806, "kl": 0.0198516845703125, "learning_rate": 1.0294756429929366e-07, "loss": -0.0401, "num_tokens": 211838549.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0421116352081299, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07303770978708417, "rewards/wordcountpos_reward/raw_geo/std": 0.06960553824923933, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06426219440409445, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 1085.375, "completions/mean_terminated_length": 1057.7333984375, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.9675935187037408, "frac_reward_zero_std": 0.0, "grad_norm": 3.080910001012228, "kl": 0.01434326171875, "learning_rate": 1.0291176668182947e-07, "loss": 0.0124, "num_tokens": 211882067.0, "reward": 0.0, "reward_std": 0.8622680306434631, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.10185765721195514, "rewards/wordcountpos_reward/raw_geo/std": 0.08396284826779549, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.022771001702132435, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1226.625, "completions/mean_terminated_length": 1187.571533203125, "completions/min_length": 952.0, "completions/min_terminated_length": 952.0, "epoch": 0.9677935587117423, "frac_reward_zero_std": 0.0, "grad_norm": 2.4833143578669605, "kl": 0.011932373046875, "learning_rate": 1.0287618706661795e-07, "loss": 0.0128, "num_tokens": 211917197.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0105336904525757, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0164983197127166, "rewards/wordcountpos_reward/raw_geo/std": 0.05593677710933872, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1235.125, "completions/mean_terminated_length": 1235.125, "completions/min_length": 964.0, "completions/min_terminated_length": 964.0, "epoch": 0.967993598719744, "frac_reward_zero_std": 0.0, "grad_norm": 3.535880750731489, "kl": 0.02032470703125, "learning_rate": 1.0284082547100797e-07, "loss": 0.0259, "num_tokens": 211961183.0, "reward": 0.0, "reward_std": 0.9192782044410706, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.29964538455240586, "rewards/wordcountpos_reward/raw_geo/std": 0.12128878842730077, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.09699179041242309, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 1025.5625, "completions/mean_terminated_length": 1025.5625, "completions/min_length": 774.0, "completions/min_terminated_length": 774.0, "epoch": 0.9681936387277456, "frac_reward_zero_std": 0.0, "grad_norm": 3.4816402419758794, "kl": 0.015350341796875, "learning_rate": 1.0280568191224201e-07, "loss": 0.0163, "num_tokens": 212003192.0, "reward": 0.0, "reward_std": 0.808796763420105, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08679040633916825, "rewards/wordcountpos_reward/raw_geo/std": 0.20045482386278915, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042255, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 1059.625, "completions/mean_terminated_length": 1059.625, "completions/min_length": 908.0, "completions/min_terminated_length": 908.0, "epoch": 0.9683936787357471, "frac_reward_zero_std": 0.0, "grad_norm": 2.508881198892353, "kl": 0.00949859619140625, "learning_rate": 1.0277075640745624e-07, "loss": 0.056, "num_tokens": 212049882.0, "reward": 0.0, "reward_std": 1.007016658782959, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07814444274650706, "rewards/wordcountpos_reward/raw_geo/std": 0.0781283583559446, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08944271909999159, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1197.125, "completions/mean_terminated_length": 1176.933349609375, "completions/min_length": 606.0, "completions/min_terminated_length": 606.0, "epoch": 0.9685937187437488, "frac_reward_zero_std": 0.0, "grad_norm": 3.3614370481219056, "kl": 0.01861572265625, "learning_rate": 1.027360489736804e-07, "loss": -0.0285, "num_tokens": 212103716.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0015990734100342, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06276889109089764, "rewards/wordcountpos_reward/raw_geo/std": 0.16293372606811818, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.12292725943057185, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1004.5625, "completions/mean_terminated_length": 1004.5625, "completions/min_length": 656.0, "completions/min_terminated_length": 656.0, "epoch": 0.9687937587517503, "frac_reward_zero_std": 0.0, "grad_norm": 3.697351943497929, "kl": 0.017913818359375, "learning_rate": 1.0270155962783814e-07, "loss": -0.0235, "num_tokens": 212139613.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8455219268798828, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09683942182548617, "rewards/wordcountpos_reward/raw_geo/std": 0.19023560445555418, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 1009.3125, "completions/mean_terminated_length": 976.6000366210938, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.968993798759752, "frac_reward_zero_std": 0.0, "grad_norm": 3.602021222148386, "kl": 0.02239990234375, "learning_rate": 1.0266728838674651e-07, "loss": -0.0261, "num_tokens": 212188026.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9905619621276855, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026660433240490083, "rewards/wordcountpos_reward/raw_geo/std": 0.09016063842291133, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.7625, "rewards/wordcountpos_reward/raw_rule/std": 0.1287403358472941, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 1113.3125, "completions/mean_terminated_length": 1113.3125, "completions/min_length": 847.0, "completions/min_terminated_length": 847.0, "epoch": 0.9691938387677536, "frac_reward_zero_std": 0.0, "grad_norm": 3.2539753264507594, "kl": 0.0152740478515625, "learning_rate": 1.0263323526711636e-07, "loss": -0.0685, "num_tokens": 212231799.0, "reward": -2.9802322387695312e-08, "reward_std": 0.80415940284729, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05811826173331928, "rewards/wordcountpos_reward/raw_geo/std": 0.07018702013106233, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.13871099718746435, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 1250.375, "completions/mean_terminated_length": 1214.71435546875, "completions/min_length": 1029.0, "completions/min_terminated_length": 1029.0, "epoch": 0.9693938787757551, "frac_reward_zero_std": 0.0, "grad_norm": 3.0226277126618584, "kl": 0.014404296875, "learning_rate": 1.025994002855521e-07, "loss": -0.0095, "num_tokens": 212270517.0, "reward": -3.725290298461914e-09, "reward_std": 1.0443062782287598, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.021432033759615086, "rewards/wordcountpos_reward/raw_geo/std": 0.056327839187417815, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029502040105226113, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1311.25, "completions/mean_terminated_length": 1164.4444580078125, "completions/min_length": 920.0, "completions/min_terminated_length": 920.0, "epoch": 0.9695939187837568, "frac_reward_zero_std": 0.0, "grad_norm": 3.0148205563232278, "kl": 0.0151519775390625, "learning_rate": 1.0256578345855191e-07, "loss": -0.0307, "num_tokens": 212325129.0, "reward": 5.960464477539063e-08, "reward_std": 0.5632712841033936, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08026373517835339, "rewards/wordcountpos_reward/raw_geo/std": 0.10535123710894564, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8708333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1270.25, "completions/mean_terminated_length": 1193.666748046875, "completions/min_length": 842.0, "completions/min_terminated_length": 842.0, "epoch": 0.9697939587917583, "frac_reward_zero_std": 0.0, "grad_norm": 2.3215897591476384, "kl": 0.0106964111328125, "learning_rate": 1.0253238480250747e-07, "loss": 0.0209, "num_tokens": 212366941.0, "reward": -1.4901161193847656e-08, "reward_std": 1.024052381515503, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07171738164339062, "rewards/wordcountpos_reward/raw_geo/std": 0.1610672900944151, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1166.0625, "completions/mean_terminated_length": 1118.357177734375, "completions/min_length": 875.0, "completions/min_terminated_length": 875.0, "epoch": 0.96999399879976, "frac_reward_zero_std": 0.0, "grad_norm": 3.4800202636422277, "kl": 0.01849365234375, "learning_rate": 1.024992043337041e-07, "loss": -0.0111, "num_tokens": 212401982.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6432831287384033, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10154853723866966, "rewards/wordcountpos_reward/raw_geo/std": 0.14011905115945375, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.975, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1171.5, "completions/mean_terminated_length": 1095.6923828125, "completions/min_length": 825.0, "completions/min_terminated_length": 825.0, "epoch": 0.9701940388077616, "frac_reward_zero_std": 0.0, "grad_norm": 3.201077355670034, "kl": 0.02142333984375, "learning_rate": 1.0246624206832075e-07, "loss": -0.0192, "num_tokens": 212455430.0, "reward": -7.450580596923828e-09, "reward_std": 1.0189968347549438, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.09777583938858979, "rewards/wordcountpos_reward/raw_geo/std": 0.13898165484954592, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081411, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 1064.6875, "completions/mean_terminated_length": 1064.6875, "completions/min_length": 771.0, "completions/min_terminated_length": 771.0, "epoch": 0.9703940788157631, "frac_reward_zero_std": 0.0, "grad_norm": 3.329544008375855, "kl": 0.017486572265625, "learning_rate": 1.0243349802242996e-07, "loss": -0.0168, "num_tokens": 212498217.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8190473318099976, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10661610838292127, "rewards/wordcountpos_reward/raw_geo/std": 0.16692366367228836, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1469.0, "completions/mean_length": 1216.0625, "completions/mean_terminated_length": 1197.1334228515625, "completions/min_length": 919.0, "completions/min_terminated_length": 919.0, "epoch": 0.9705941188237648, "frac_reward_zero_std": 0.0, "grad_norm": 2.386179456417828, "kl": 0.011871337890625, "learning_rate": 1.0240097221199794e-07, "loss": -0.0954, "num_tokens": 212551154.0, "reward": 1.4901161193847656e-08, "reward_std": 0.8877211809158325, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04742460494033403, "rewards/wordcountpos_reward/raw_geo/std": 0.09887723951108807, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.13526380260918403, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1261.6875, "completions/mean_terminated_length": 1227.6429443359375, "completions/min_length": 1075.0, "completions/min_terminated_length": 1075.0, "epoch": 0.9707941588317663, "frac_reward_zero_std": 0.0, "grad_norm": 3.216732802904583, "kl": 0.01904296875, "learning_rate": 1.0236866465288431e-07, "loss": -0.0242, "num_tokens": 212592261.0, "reward": -4.470348358154297e-08, "reward_std": 1.0307393074035645, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05936426634419038, "rewards/wordcountpos_reward/raw_geo/std": 0.08793519564359625, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1153.8125, "completions/mean_terminated_length": 1104.357177734375, "completions/min_length": 870.0, "completions/min_terminated_length": 870.0, "epoch": 0.970994198839768, "frac_reward_zero_std": 0.0, "grad_norm": 3.144185222648956, "kl": 0.0160980224609375, "learning_rate": 1.0233657536084249e-07, "loss": -0.0075, "num_tokens": 212638210.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0460028648376465, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.09224516615398223, "rewards/wordcountpos_reward/raw_geo/std": 0.14585251750724787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 939.1875, "completions/mean_terminated_length": 939.1875, "completions/min_length": 800.0, "completions/min_terminated_length": 800.0, "epoch": 0.9711942388477696, "frac_reward_zero_std": 0.0, "grad_norm": 3.6359976204305147, "kl": 0.0159149169921875, "learning_rate": 1.0230470435151932e-07, "loss": -0.0194, "num_tokens": 212684421.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9558795690536499, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.09788804003449071, "rewards/wordcountpos_reward/raw_geo/std": 0.061232517710867344, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08388704928078611, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 1041.625, "completions/mean_terminated_length": 1011.0667114257812, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.9713942788557711, "frac_reward_zero_std": 0.0, "grad_norm": 3.5894622371728255, "kl": 0.01910400390625, "learning_rate": 1.0227305164045527e-07, "loss": 0.0066, "num_tokens": 212727199.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0627164840698242, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.24273885446250545, "rewards/wordcountpos_reward/raw_geo/std": 0.11302049508697319, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06652763279965644, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1277.0, "completions/max_terminated_length": 1277.0, "completions/mean_length": 1029.625, "completions/mean_terminated_length": 1029.625, "completions/min_length": 838.0, "completions/min_terminated_length": 838.0, "epoch": 0.9715943188637728, "frac_reward_zero_std": 0.0, "grad_norm": 3.146288182066384, "kl": 0.018402099609375, "learning_rate": 1.0224161724308424e-07, "loss": 0.0128, "num_tokens": 212762961.0, "reward": 0.0, "reward_std": 1.005742073059082, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.16031561304102437, "rewards/wordcountpos_reward/raw_geo/std": 0.08255229261164576, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.056927504255331086, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1153.75, "completions/mean_terminated_length": 996.3636474609375, "completions/min_length": 667.0, "completions/min_terminated_length": 667.0, "epoch": 0.9717943588717743, "frac_reward_zero_std": 0.0, "grad_norm": 3.2008786086681233, "kl": 0.01739501953125, "learning_rate": 1.0221040117473392e-07, "loss": -0.0598, "num_tokens": 212805885.0, "reward": -5.587935447692871e-08, "reward_std": 1.0567270517349243, "rewards/wordcountpos_reward/mean": -5.587935447692871e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.013869079076950225, "rewards/wordcountpos_reward/raw_geo/std": 0.12808216606972858, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08243965245133131, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 993.6875, "completions/mean_terminated_length": 993.6875, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.9719943988797759, "frac_reward_zero_std": 0.0, "grad_norm": 3.5516716157357906, "kl": 0.0191650390625, "learning_rate": 1.0217940345062539e-07, "loss": 0.0072, "num_tokens": 212850344.0, "reward": 0.0, "reward_std": 0.8519189357757568, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.022917527081769973, "rewards/wordcountpos_reward/raw_geo/std": 0.1986690336161579, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.14240006242195885, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1270.5, "completions/mean_terminated_length": 1270.5, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.9721944388877776, "frac_reward_zero_std": 0.0, "grad_norm": 2.2138897424627695, "kl": 0.011383056640625, "learning_rate": 1.0214862408587322e-07, "loss": 0.0211, "num_tokens": 212899600.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9955963492393494, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.17352606911192026, "rewards/wordcountpos_reward/raw_geo/std": 0.11430199961942443, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 1277.375, "completions/mean_terminated_length": 1226.0, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.9723944788957791, "frac_reward_zero_std": 0.0, "grad_norm": 3.1703738714244825, "kl": 0.01751708984375, "learning_rate": 1.0211806309548557e-07, "loss": -0.0081, "num_tokens": 212950622.0, "reward": 0.0, "reward_std": 0.5700256824493408, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.29172239096687913, "rewards/wordcountpos_reward/raw_geo/std": 0.19705197599253566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06763190130459201, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 1216.375, "completions/mean_terminated_length": 1216.375, "completions/min_length": 958.0, "completions/min_terminated_length": 958.0, "epoch": 0.9725945189037808, "frac_reward_zero_std": 0.0, "grad_norm": 3.512313632339308, "kl": 0.020294189453125, "learning_rate": 1.020877204943642e-07, "loss": -0.0297, "num_tokens": 213007340.0, "reward": 0.0, "reward_std": 0.7116823196411133, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07148148639366883, "rewards/wordcountpos_reward/raw_geo/std": 0.10237743457173876, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1141.875, "completions/mean_terminated_length": 1141.875, "completions/min_length": 898.0, "completions/min_terminated_length": 898.0, "epoch": 0.9727945589117823, "frac_reward_zero_std": 0.0, "grad_norm": 3.226519606962069, "kl": 0.017669677734375, "learning_rate": 1.0205759629730417e-07, "loss": -0.0093, "num_tokens": 213047234.0, "reward": 0.0, "reward_std": 0.5187197923660278, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14467401165247715, "rewards/wordcountpos_reward/raw_geo/std": 0.05427751677281459, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 1089.5625, "completions/mean_terminated_length": 1089.5625, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.9729945989197839, "frac_reward_zero_std": 0.0, "grad_norm": 2.9433599734065576, "kl": 0.01363372802734375, "learning_rate": 1.0202769051899432e-07, "loss": -0.0286, "num_tokens": 213088123.0, "reward": 0.0, "reward_std": 0.6978564858436584, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.0027536901486962303, "rewards/wordcountpos_reward/raw_geo/std": 0.04683874899604421, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1415.0, "completions/mean_length": 1204.1875, "completions/mean_terminated_length": 1184.4666748046875, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.9731946389277856, "frac_reward_zero_std": 0.0, "grad_norm": 3.315443249924069, "kl": 0.018280029296875, "learning_rate": 1.0199800317401675e-07, "loss": 0.0532, "num_tokens": 213136750.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7330182790756226, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16235740451701142, "rewards/wordcountpos_reward/raw_geo/std": 0.06032660000996276, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 1410.4375, "completions/mean_terminated_length": 1340.77783203125, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.9733946789357871, "frac_reward_zero_std": 0.0, "grad_norm": 3.1720798135039754, "kl": 0.017120361328125, "learning_rate": 1.0196853427684717e-07, "loss": 0.0097, "num_tokens": 213194101.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9027075171470642, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.05275682368160602, "rewards/wordcountpos_reward/raw_geo/std": 0.06526286363270349, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08333333333333336, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 1181.9375, "completions/mean_terminated_length": 1181.9375, "completions/min_length": 1052.0, "completions/min_terminated_length": 1052.0, "epoch": 0.9735947189437888, "frac_reward_zero_std": 0.0, "grad_norm": 2.2231501279449555, "kl": 0.00962066650390625, "learning_rate": 1.0193928384185474e-07, "loss": 0.0174, "num_tokens": 213227100.0, "reward": 0.0, "reward_std": 0.6750542521476746, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.000980452443711421, "rewards/wordcountpos_reward/raw_geo/std": 0.07053277222367686, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03626037527129048, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1236.5625, "completions/mean_terminated_length": 1148.75, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.9737947589517904, "frac_reward_zero_std": 0.0, "grad_norm": 2.9815933978759763, "kl": 0.016876220703125, "learning_rate": 1.0191025188330209e-07, "loss": 0.0143, "num_tokens": 213262341.0, "reward": 1.862645149230957e-08, "reward_std": 1.0146101713180542, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0649120994654197, "rewards/wordcountpos_reward/raw_geo/std": 0.0568658684641448, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.06440611887195306, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 1115.25, "completions/mean_terminated_length": 1089.60009765625, "completions/min_length": 813.0, "completions/min_terminated_length": 813.0, "epoch": 0.9739947989597919, "frac_reward_zero_std": 0.0, "grad_norm": 2.0968412648541035, "kl": 0.0133819580078125, "learning_rate": 1.0188143841534536e-07, "loss": -0.0029, "num_tokens": 213303361.0, "reward": 1.862645149230957e-08, "reward_std": 1.0598949193954468, "rewards/wordcountpos_reward/mean": 1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07130166366623708, "rewards/wordcountpos_reward/raw_geo/std": 0.04428963475010398, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.062063289083417524, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1171.5, "completions/mean_terminated_length": 1171.5, "completions/min_length": 988.0, "completions/min_terminated_length": 988.0, "epoch": 0.9741948389677936, "frac_reward_zero_std": 0.0, "grad_norm": 2.901858357847452, "kl": 0.01629638671875, "learning_rate": 1.0185284345203416e-07, "loss": -0.0302, "num_tokens": 213344369.0, "reward": -5.960464477539063e-08, "reward_std": 0.8074215054512024, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14318504801795565, "rewards/wordcountpos_reward/raw_geo/std": 0.11311080199806257, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 1036.875, "completions/mean_terminated_length": 1036.875, "completions/min_length": 910.0, "completions/min_terminated_length": 910.0, "epoch": 0.9743948789757951, "frac_reward_zero_std": 0.0, "grad_norm": 3.1666219804109206, "kl": 0.018890380859375, "learning_rate": 1.0182446700731143e-07, "loss": 0.0126, "num_tokens": 213384487.0, "reward": 0.0, "reward_std": 0.8249167203903198, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0474237282233176, "rewards/wordcountpos_reward/raw_geo/std": 0.056583168754648756, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06978803887752091, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1208.0, "completions/max_terminated_length": 1208.0, "completions/mean_length": 918.875, "completions/mean_terminated_length": 918.875, "completions/min_length": 631.0, "completions/min_terminated_length": 631.0, "epoch": 0.9745949189837968, "frac_reward_zero_std": 0.0, "grad_norm": 2.0447408637766427, "kl": 0.0104217529296875, "learning_rate": 1.0179630909501371e-07, "loss": -0.0184, "num_tokens": 213423333.0, "reward": 2.9802322387695312e-08, "reward_std": 0.819499135017395, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.13585797046585266, "rewards/wordcountpos_reward/raw_geo/std": 0.28176659077129185, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1342.3125, "completions/mean_terminated_length": 1270.6363525390625, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.9747949589917984, "frac_reward_zero_std": 0.0, "grad_norm": 2.916103549607408, "kl": 0.0162200927734375, "learning_rate": 1.0176836972887095e-07, "loss": 0.0002, "num_tokens": 213475930.0, "reward": 0.0, "reward_std": 0.7701718807220459, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0005546468960054429, "rewards/wordcountpos_reward/raw_geo/std": 0.05314152591332019, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11021863793455329, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 1143.5625, "completions/mean_terminated_length": 1143.5625, "completions/min_length": 716.0, "completions/min_terminated_length": 716.0, "epoch": 0.9749949989997999, "frac_reward_zero_std": 0.0, "grad_norm": 3.43351074114993, "kl": 0.0189361572265625, "learning_rate": 1.0174064892250654e-07, "loss": 0.0194, "num_tokens": 213525579.0, "reward": -2.9802322387695312e-08, "reward_std": 0.750813364982605, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11755766552528414, "rewards/wordcountpos_reward/raw_geo/std": 0.0912779674175137, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.13957607775504186, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1302.875, "completions/mean_terminated_length": 1274.71435546875, "completions/min_length": 1038.0, "completions/min_terminated_length": 1038.0, "epoch": 0.9751950390078016, "frac_reward_zero_std": 0.0, "grad_norm": 2.9611964398229746, "kl": 0.0133819580078125, "learning_rate": 1.0171314668943713e-07, "loss": 0.0424, "num_tokens": 213572865.0, "reward": 0.0, "reward_std": 0.759361743927002, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09480174693004585, "rewards/wordcountpos_reward/raw_geo/std": 0.07573521590676023, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857661, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 1268.375, "completions/mean_terminated_length": 1129.4000244140625, "completions/min_length": 892.0, "completions/min_terminated_length": 892.0, "epoch": 0.9753950790158031, "frac_reward_zero_std": 0.0, "grad_norm": 2.265992890114196, "kl": 0.0132904052734375, "learning_rate": 1.0168586304307306e-07, "loss": -0.0347, "num_tokens": 213621287.0, "reward": 0.0, "reward_std": 0.8430043458938599, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11391079424861292, "rewards/wordcountpos_reward/raw_geo/std": 0.13968632177654064, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.08255189164891871, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 1115.6875, "completions/mean_terminated_length": 1090.0667724609375, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.9755951190238048, "frac_reward_zero_std": 0.0, "grad_norm": 3.6662332884089883, "kl": 0.0158843994140625, "learning_rate": 1.0165879799671793e-07, "loss": -0.0587, "num_tokens": 213663082.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6932845711708069, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.20685622822617292, "rewards/wordcountpos_reward/raw_geo/std": 0.24846810084237342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1361.0, "completions/mean_length": 1192.5, "completions/mean_terminated_length": 1172.0001220703125, "completions/min_length": 959.0, "completions/min_terminated_length": 959.0, "epoch": 0.9757951590318064, "frac_reward_zero_std": 0.0, "grad_norm": 3.5158753836451866, "kl": 0.019500732421875, "learning_rate": 1.0163195156356878e-07, "loss": -0.008, "num_tokens": 213711618.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9934870600700378, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035081784662058565, "rewards/wordcountpos_reward/raw_geo/std": 0.10525381388102341, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1170.4375, "completions/mean_terminated_length": 1123.357177734375, "completions/min_length": 815.0, "completions/min_terminated_length": 815.0, "epoch": 0.9759951990398079, "frac_reward_zero_std": 0.0, "grad_norm": 2.9971651943509134, "kl": 0.0156402587890625, "learning_rate": 1.0160532375671602e-07, "loss": 0.0394, "num_tokens": 213762177.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0490314960479736, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0593000796862395, "rewards/wordcountpos_reward/raw_geo/std": 0.04691912853094349, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1055.0, "completions/mean_terminated_length": 1055.0, "completions/min_length": 871.0, "completions/min_terminated_length": 871.0, "epoch": 0.9761952390478096, "frac_reward_zero_std": 0.0, "grad_norm": 3.323507972926742, "kl": 0.0155792236328125, "learning_rate": 1.0157891458914351e-07, "loss": 0.0334, "num_tokens": 213807665.0, "reward": 0.0, "reward_std": 0.8608255386352539, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.09736468939539043, "rewards/wordcountpos_reward/raw_geo/std": 0.10509346255741975, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04216370213557838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 998.625, "completions/mean_terminated_length": 998.625, "completions/min_length": 717.0, "completions/min_terminated_length": 717.0, "epoch": 0.9763952790558111, "frac_reward_zero_std": 0.0, "grad_norm": 2.8281797324930835, "kl": 0.013397216796875, "learning_rate": 1.015527240737285e-07, "loss": -0.0318, "num_tokens": 213840811.0, "reward": 0.0, "reward_std": 0.6499125361442566, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.07812971965258009, "rewards/wordcountpos_reward/raw_geo/std": 0.13733462678576555, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 1096.8125, "completions/mean_terminated_length": 1096.8125, "completions/min_length": 896.0, "completions/min_terminated_length": 896.0, "epoch": 0.9765953190638128, "frac_reward_zero_std": 0.0, "grad_norm": 2.816011666346549, "kl": 0.0121917724609375, "learning_rate": 1.0152675222324156e-07, "loss": 0.0473, "num_tokens": 213883576.0, "reward": 4.470348358154297e-08, "reward_std": 1.0307096242904663, "rewards/wordcountpos_reward/mean": 4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.11024265113955398, "rewards/wordcountpos_reward/raw_geo/std": 0.08614365476029412, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.09269623828717428, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1282.0, "completions/max_terminated_length": 1282.0, "completions/mean_length": 1061.875, "completions/mean_terminated_length": 1061.875, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.9767953590718144, "frac_reward_zero_std": 0.0, "grad_norm": 3.6243555180282145, "kl": 0.021697998046875, "learning_rate": 1.0150099905034672e-07, "loss": 0.0302, "num_tokens": 213921686.0, "reward": -1.862645149230957e-08, "reward_std": 1.058918833732605, "rewards/wordcountpos_reward/mean": -1.862645149230957e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0043055237565849756, "rewards/wordcountpos_reward/raw_geo/std": 0.08246938162265248, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8125, "rewards/wordcountpos_reward/raw_rule/std": 0.1792473978322409, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1001.375, "completions/mean_terminated_length": 1001.375, "completions/min_length": 702.0, "completions/min_terminated_length": 702.0, "epoch": 0.9769953990798159, "frac_reward_zero_std": 0.0, "grad_norm": 3.161197545661253, "kl": 0.0139923095703125, "learning_rate": 1.0147546456760136e-07, "loss": -0.0162, "num_tokens": 213955908.0, "reward": -5.960464477539063e-08, "reward_std": 0.8322240114212036, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.21344260465902215, "rewards/wordcountpos_reward/raw_geo/std": 0.1585867206650566, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1489.0, "completions/mean_length": 1233.75, "completions/mean_terminated_length": 1195.71435546875, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.9771954390878176, "frac_reward_zero_std": 0.0, "grad_norm": 2.9032625640288257, "kl": 0.0167999267578125, "learning_rate": 1.0145014878745622e-07, "loss": 0.0052, "num_tokens": 213993848.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9779922366142273, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.14736926437085227, "rewards/wordcountpos_reward/raw_geo/std": 0.13253307071713583, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.057089922571845, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1051.5, "completions/mean_terminated_length": 1021.6000366210938, "completions/min_length": 792.0, "completions/min_terminated_length": 792.0, "epoch": 0.9773954790958191, "frac_reward_zero_std": 0.0, "grad_norm": 3.1705454756620868, "kl": 0.015899658203125, "learning_rate": 1.0142505172225526e-07, "loss": -0.0674, "num_tokens": 214048616.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8651255965232849, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19881239801070083, "rewards/wordcountpos_reward/raw_geo/std": 0.12732408718306498, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07503085784948503, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1302.0, "completions/mean_length": 1102.125, "completions/mean_terminated_length": 1075.60009765625, "completions/min_length": 879.0, "completions/min_terminated_length": 879.0, "epoch": 0.9775955191038208, "frac_reward_zero_std": 0.0, "grad_norm": 3.584899563210217, "kl": 0.01806640625, "learning_rate": 1.0140017338423605e-07, "loss": -0.017, "num_tokens": 214091450.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6744341254234314, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06519485685372257, "rewards/wordcountpos_reward/raw_geo/std": 0.1709193849309146, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1112221667221529, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1293.9375, "completions/mean_terminated_length": 1246.3846435546875, "completions/min_length": 1043.0, "completions/min_terminated_length": 1043.0, "epoch": 0.9777955591118224, "frac_reward_zero_std": 0.0, "grad_norm": 2.7101033162551897, "kl": 0.0158843994140625, "learning_rate": 1.0137551378552938e-07, "loss": 0.0189, "num_tokens": 214142353.0, "reward": 0.0, "reward_std": 0.8336164355278015, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.06647031572668918, "rewards/wordcountpos_reward/raw_geo/std": 0.13201501364910148, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.09388724521901158, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1402.0, "completions/mean_length": 1070.5, "completions/mean_terminated_length": 927.3333740234375, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.9779955991198239, "frac_reward_zero_std": 0.0, "grad_norm": 2.539918027223357, "kl": 0.0115966796875, "learning_rate": 1.0135107293815932e-07, "loss": -0.1041, "num_tokens": 214185081.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8237836360931396, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05762523883244149, "rewards/wordcountpos_reward/raw_geo/std": 0.05706054695402342, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1473.0, "completions/mean_length": 1266.625, "completions/mean_terminated_length": 1212.769287109375, "completions/min_length": 1042.0, "completions/min_terminated_length": 1042.0, "epoch": 0.9781956391278256, "frac_reward_zero_std": 0.0, "grad_norm": 2.7671034516491244, "kl": 0.014801025390625, "learning_rate": 1.0132685085404337e-07, "loss": -0.0031, "num_tokens": 214235443.0, "reward": -1.1175870895385742e-08, "reward_std": 1.0181972980499268, "rewards/wordcountpos_reward/mean": -1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.025641608714714482, "rewards/wordcountpos_reward/raw_geo/std": 0.035755265825035736, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.12102953419784838, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 1333.375, "completions/mean_terminated_length": 1322.2667236328125, "completions/min_length": 1212.0, "completions/min_terminated_length": 1212.0, "epoch": 0.9783956791358271, "frac_reward_zero_std": 0.0, "grad_norm": 2.4538090458952544, "kl": 0.009246826171875, "learning_rate": 1.0130284754499233e-07, "loss": -0.0148, "num_tokens": 214276777.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8241698741912842, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19657345412386015, "rewards/wordcountpos_reward/raw_geo/std": 0.13749412679710846, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1199.0, "completions/mean_terminated_length": 1156.0, "completions/min_length": 997.0, "completions/min_terminated_length": 997.0, "epoch": 0.9785957191438288, "frac_reward_zero_std": 0.0, "grad_norm": 2.8715186236863293, "kl": 0.0145263671875, "learning_rate": 1.0127906302271034e-07, "loss": -0.0495, "num_tokens": 214326881.0, "reward": 0.0, "reward_std": 0.4731966257095337, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.2903746899107309, "rewards/wordcountpos_reward/raw_geo/std": 0.3294738095229966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.14900907255500823, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 1091.8125, "completions/mean_terminated_length": 1091.8125, "completions/min_length": 829.0, "completions/min_terminated_length": 829.0, "epoch": 0.9787957591518304, "frac_reward_zero_std": 0.0, "grad_norm": 3.3340084848065774, "kl": 0.01763916015625, "learning_rate": 1.0125549729879478e-07, "loss": 0.0091, "num_tokens": 214375134.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6901153326034546, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11451065374200535, "rewards/wordcountpos_reward/raw_geo/std": 0.14749471851710508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026002, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1364.0, "completions/max_terminated_length": 1364.0, "completions/mean_length": 1145.3125, "completions/mean_terminated_length": 1145.3125, "completions/min_length": 848.0, "completions/min_terminated_length": 848.0, "epoch": 0.9789957991598319, "frac_reward_zero_std": 0.0, "grad_norm": 3.11261801577571, "kl": 0.017364501953125, "learning_rate": 1.0123215038473644e-07, "loss": -0.0397, "num_tokens": 214419875.0, "reward": 0.0, "reward_std": 0.8528201580047607, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.11581913558047584, "rewards/wordcountpos_reward/raw_geo/std": 0.0493980668463826, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8333333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.1192569587999888, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 1058.1875, "completions/mean_terminated_length": 1058.1875, "completions/min_length": 827.0, "completions/min_terminated_length": 827.0, "epoch": 0.9791958391678336, "frac_reward_zero_std": 0.0, "grad_norm": 3.693297401590695, "kl": 0.018768310546875, "learning_rate": 1.0120902229191944e-07, "loss": 0.0099, "num_tokens": 214464726.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9523783326148987, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.003911545949854236, "rewards/wordcountpos_reward/raw_geo/std": 0.10742072605483621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.7916666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.11385500851066223, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1481.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1027.875, "completions/mean_terminated_length": 1027.875, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.9793958791758351, "frac_reward_zero_std": 0.0, "grad_norm": 3.452564930139897, "kl": 0.014129638671875, "learning_rate": 1.0118611303162104e-07, "loss": -0.0497, "num_tokens": 214506948.0, "reward": -4.470348358154297e-08, "reward_std": 1.0670479536056519, "rewards/wordcountpos_reward/mean": -4.470348358154297e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.004339342582384727, "rewards/wordcountpos_reward/raw_geo/std": 0.08642616327156508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1253.625, "completions/mean_terminated_length": 1062.0, "completions/min_length": 699.0, "completions/min_terminated_length": 699.0, "epoch": 0.9795959191838368, "frac_reward_zero_std": 0.0, "grad_norm": 3.4932985692439953, "kl": 0.0189208984375, "learning_rate": 1.0116342261501196e-07, "loss": -0.0474, "num_tokens": 214549902.0, "reward": 7.450580596923828e-09, "reward_std": 1.0324244499206543, "rewards/wordcountpos_reward/mean": 7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.024440702813432198, "rewards/wordcountpos_reward/raw_geo/std": 0.08508119006714462, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.11800816042090448, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1301.0, "completions/max_terminated_length": 1301.0, "completions/mean_length": 1070.9375, "completions/mean_terminated_length": 1070.9375, "completions/min_length": 783.0, "completions/min_terminated_length": 783.0, "epoch": 0.9797959591918384, "frac_reward_zero_std": 0.0, "grad_norm": 3.3046385136150667, "kl": 0.016632080078125, "learning_rate": 1.0114095105315611e-07, "loss": -0.0396, "num_tokens": 214583365.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7479479312896729, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.006349110561140407, "rewards/wordcountpos_reward/raw_geo/std": 0.0659964779806192, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.09098229375970787, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 1263.3125, "completions/mean_terminated_length": 1184.416748046875, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.9799959991998399, "frac_reward_zero_std": 0.0, "grad_norm": 3.503835717195483, "kl": 0.01904296875, "learning_rate": 1.0111869835701079e-07, "loss": -0.0418, "num_tokens": 214629122.0, "reward": 0.0, "reward_std": 0.42662423849105835, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.25430400684613214, "rewards/wordcountpos_reward/raw_geo/std": 0.32961326095378546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05947299418254506, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 965.375, "completions/mean_terminated_length": 965.375, "completions/min_length": 853.0, "completions/min_terminated_length": 853.0, "epoch": 0.9801960392078416, "frac_reward_zero_std": 0.0, "grad_norm": 3.8290439374802294, "kl": 0.021331787109375, "learning_rate": 1.0109666453742648e-07, "loss": 0.016, "num_tokens": 214660664.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9434012174606323, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.01865431743502511, "rewards/wordcountpos_reward/raw_geo/std": 0.08484657657330628, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10000000000000002, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 1067.5, "completions/mean_terminated_length": 1067.5, "completions/min_length": 743.0, "completions/min_terminated_length": 743.0, "epoch": 0.9803960792158432, "frac_reward_zero_std": 0.0, "grad_norm": 3.4329561149834555, "kl": 0.01953125, "learning_rate": 1.0107484960514692e-07, "loss": -0.0413, "num_tokens": 214709608.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7271832227706909, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1269300360529899, "rewards/wordcountpos_reward/raw_geo/std": 0.14638727594470194, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09727776191382574, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 1101.375, "completions/mean_terminated_length": 1101.375, "completions/min_length": 950.0, "completions/min_terminated_length": 950.0, "epoch": 0.9805961192238448, "frac_reward_zero_std": 0.0, "grad_norm": 2.7285913083506754, "kl": 0.014068603515625, "learning_rate": 1.0105325357080925e-07, "loss": 0.0075, "num_tokens": 214750206.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6810318231582642, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07471795752016316, "rewards/wordcountpos_reward/raw_geo/std": 0.0809018131882831, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237134, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1426.0, "completions/mean_length": 1109.4375, "completions/mean_terminated_length": 1053.6429443359375, "completions/min_length": 632.0, "completions/min_terminated_length": 632.0, "epoch": 0.9807961592318464, "frac_reward_zero_std": 0.0, "grad_norm": 3.701621899342914, "kl": 0.019683837890625, "learning_rate": 1.0103187644494377e-07, "loss": 0.046, "num_tokens": 214804277.0, "reward": -3.725290298461914e-08, "reward_std": 1.037473440170288, "rewards/wordcountpos_reward/mean": -3.725290298461914e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06687172498963391, "rewards/wordcountpos_reward/raw_geo/std": 0.06253120108638746, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.7999999999999999, "rewards/wordcountpos_reward/raw_rule/std": 0.12171612389003694, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 1014.75, "completions/mean_terminated_length": 1014.75, "completions/min_length": 769.0, "completions/min_terminated_length": 769.0, "epoch": 0.9809961992398479, "frac_reward_zero_std": 0.0, "grad_norm": 3.3049683974229187, "kl": 0.02081298828125, "learning_rate": 1.0101071823797407e-07, "loss": 0.0212, "num_tokens": 214844057.0, "reward": -5.960464477539063e-08, "reward_std": 0.553397536277771, "rewards/wordcountpos_reward/mean": -5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0017221308338393393, "rewards/wordcountpos_reward/raw_geo/std": 0.09744676581123485, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.07649739768026001, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1221.0, "completions/max_terminated_length": 1221.0, "completions/mean_length": 985.25, "completions/mean_terminated_length": 985.25, "completions/min_length": 830.0, "completions/min_terminated_length": 830.0, "epoch": 0.9811962392478496, "frac_reward_zero_std": 0.0, "grad_norm": 4.05575131090494, "kl": 0.022247314453125, "learning_rate": 1.0098977896021697e-07, "loss": -0.0291, "num_tokens": 214894941.0, "reward": 0.0, "reward_std": 1.0030176639556885, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14465677100328653, "rewards/wordcountpos_reward/raw_geo/std": 0.12682100689420547, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.054262735320332364, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 1183.5625, "completions/mean_terminated_length": 1162.4666748046875, "completions/min_length": 854.0, "completions/min_terminated_length": 854.0, "epoch": 0.9813962792558512, "frac_reward_zero_std": 0.0, "grad_norm": 3.3548513723310105, "kl": 0.0183563232421875, "learning_rate": 1.009690586218825e-07, "loss": 0.0349, "num_tokens": 214933750.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6484261155128479, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.1656580252612088, "rewards/wordcountpos_reward/raw_geo/std": 0.2112716232262787, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 1161.875, "completions/mean_terminated_length": 1083.84619140625, "completions/min_length": 941.0, "completions/min_terminated_length": 941.0, "epoch": 0.9815963192638528, "frac_reward_zero_std": 0.0, "grad_norm": 2.7764463734053404, "kl": 0.0145721435546875, "learning_rate": 1.0094855723307415e-07, "loss": 0.0322, "num_tokens": 214986948.0, "reward": 0.0, "reward_std": 0.7291247844696045, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1341377189544377, "rewards/wordcountpos_reward/raw_geo/std": 0.09847264621789413, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0877707451472511, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1288.0, "completions/max_terminated_length": 1288.0, "completions/mean_length": 1063.125, "completions/mean_terminated_length": 1063.125, "completions/min_length": 753.0, "completions/min_terminated_length": 753.0, "epoch": 0.9817963592718544, "frac_reward_zero_std": 0.0, "grad_norm": 3.5369965663241105, "kl": 0.01763916015625, "learning_rate": 1.009282748037883e-07, "loss": -0.0177, "num_tokens": 215018262.0, "reward": 0.0, "reward_std": 0.8860327005386353, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.169619624154854, "rewards/wordcountpos_reward/raw_geo/std": 0.12529213991798083, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11213417888437974, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 971.4375, "completions/mean_terminated_length": 936.2000732421875, "completions/min_length": 698.0, "completions/min_terminated_length": 698.0, "epoch": 0.9819963992798559, "frac_reward_zero_std": 0.0, "grad_norm": 3.587131511943673, "kl": 0.0173492431640625, "learning_rate": 1.0090821134391492e-07, "loss": 0.0691, "num_tokens": 215061141.0, "reward": 2.9802322387695312e-08, "reward_std": 0.7243823409080505, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.2816711285282224, "rewards/wordcountpos_reward/raw_geo/std": 0.23294017137098436, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1308094458023239, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1061.0, "completions/max_terminated_length": 1061.0, "completions/mean_length": 886.875, "completions/mean_terminated_length": 886.875, "completions/min_length": 790.0, "completions/min_terminated_length": 790.0, "epoch": 0.9821964392878576, "frac_reward_zero_std": 0.0, "grad_norm": 3.0562629759233144, "kl": 0.0111846923828125, "learning_rate": 1.0088836686323691e-07, "loss": 0.008, "num_tokens": 215099707.0, "reward": 0.0, "reward_std": 0.9146708250045776, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1276339127447461, "rewards/wordcountpos_reward/raw_geo/std": 0.1462774315603887, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06885303726590965, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1221.875, "completions/mean_terminated_length": 1221.875, "completions/min_length": 949.0, "completions/min_terminated_length": 949.0, "epoch": 0.9823964792958592, "frac_reward_zero_std": 0.0, "grad_norm": 3.0194906275379583, "kl": 0.018157958984375, "learning_rate": 1.0086874137143065e-07, "loss": -0.0407, "num_tokens": 215140449.0, "reward": 0.0, "reward_std": 0.8429617881774902, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2526731957021779, "rewards/wordcountpos_reward/raw_geo/std": 0.2642087058316508, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1083.5625, "completions/mean_terminated_length": 1083.5625, "completions/min_length": 751.0, "completions/min_terminated_length": 751.0, "epoch": 0.9825965193038608, "frac_reward_zero_std": 0.0, "grad_norm": 2.6172601895447807, "kl": 0.0169525146484375, "learning_rate": 1.0084933487806555e-07, "loss": 0.0186, "num_tokens": 215190826.0, "reward": -2.9802322387695312e-08, "reward_std": 0.9877358675003052, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.03787765231882526, "rewards/wordcountpos_reward/raw_geo/std": 0.18574013863187266, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8625, "rewards/wordcountpos_reward/raw_rule/std": 0.0749073501808141, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.5, "completions/max_length": 1500.0, "completions/max_terminated_length": 1350.0, "completions/mean_length": 1350.9375, "completions/mean_terminated_length": 1201.875, "completions/min_length": 833.0, "completions/min_terminated_length": 833.0, "epoch": 0.9827965593118624, "frac_reward_zero_std": 0.0, "grad_norm": 3.4571383986929507, "kl": 0.01727294921875, "learning_rate": 1.0083014739260426e-07, "loss": -0.0061, "num_tokens": 215238353.0, "reward": -2.9802322387695312e-08, "reward_std": 0.22031590342521667, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.19128390038131474, "rewards/wordcountpos_reward/raw_geo/std": 0.19815638475078662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9166666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10470416879457554, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 1210.0625, "completions/mean_terminated_length": 1113.416748046875, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.9829965993198639, "frac_reward_zero_std": 0.0, "grad_norm": 2.4874707374569294, "kl": 0.0118408203125, "learning_rate": 1.0081117892440282e-07, "loss": 0.0087, "num_tokens": 215292874.0, "reward": 0.0, "reward_std": 0.8636307716369629, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.1147331451973333, "rewards/wordcountpos_reward/raw_geo/std": 0.15539752769344928, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.11917929226045816, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 1194.625, "completions/mean_terminated_length": 1194.625, "completions/min_length": 877.0, "completions/min_terminated_length": 877.0, "epoch": 0.9831966393278656, "frac_reward_zero_std": 0.0, "grad_norm": 2.6061108026227253, "kl": 0.00977325439453125, "learning_rate": 1.007924294827102e-07, "loss": 0.018, "num_tokens": 215328548.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9416689872741699, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1914422130788284, "rewards/wordcountpos_reward/raw_geo/std": 0.22348352011051958, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 948.5625, "completions/mean_terminated_length": 948.5625, "completions/min_length": 730.0, "completions/min_terminated_length": 730.0, "epoch": 0.9833966793358672, "frac_reward_zero_std": 0.0, "grad_norm": 3.7500276994447317, "kl": 0.018951416015625, "learning_rate": 1.0077389907666878e-07, "loss": -0.0161, "num_tokens": 215369773.0, "reward": 0.0, "reward_std": 0.738404393196106, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.3122312879952716, "rewards/wordcountpos_reward/raw_geo/std": 0.15158316750231376, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 1061.0625, "completions/mean_terminated_length": 1061.0625, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9835967193438687, "frac_reward_zero_std": 0.0, "grad_norm": 3.064066177452661, "kl": 0.0184326171875, "learning_rate": 1.0075558771531406e-07, "loss": 0.001, "num_tokens": 215409782.0, "reward": 0.0, "reward_std": 0.8268251419067383, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.08757050818224221, "rewards/wordcountpos_reward/raw_geo/std": 0.09689980063441422, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1036.1875, "completions/mean_terminated_length": 1036.1875, "completions/min_length": 863.0, "completions/min_terminated_length": 863.0, "epoch": 0.9837967593518704, "frac_reward_zero_std": 0.0, "grad_norm": 2.5979246830859677, "kl": 0.010284423828125, "learning_rate": 1.0073749540757474e-07, "loss": -0.005, "num_tokens": 215449361.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0305869579315186, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.08975270608602334, "rewards/wordcountpos_reward/raw_geo/std": 0.21052752152445478, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0683130051063973, "rewards/wordcountpos_reward/std": 1.0327954292297363, "step": 4918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1500.0, "completions/mean_length": 1132.125, "completions/mean_terminated_length": 1079.571533203125, "completions/min_length": 733.0, "completions/min_terminated_length": 733.0, "epoch": 0.9839967993598719, "frac_reward_zero_std": 0.0, "grad_norm": 3.398524134607184, "kl": 0.0203857421875, "learning_rate": 1.0071962216227277e-07, "loss": -0.0259, "num_tokens": 215491571.0, "reward": -2.9802322387695312e-08, "reward_std": 1.0603892803192139, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.17122602000887788, "rewards/wordcountpos_reward/raw_geo/std": 0.165417416928883, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08933913745655642, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 928.875, "completions/mean_terminated_length": 928.875, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.9841968393678736, "frac_reward_zero_std": 0.0, "grad_norm": 2.6996314294967725, "kl": 0.01031494140625, "learning_rate": 1.0070196798812312e-07, "loss": 0.0091, "num_tokens": 215525681.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6051958799362183, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.050699322489796014, "rewards/wordcountpos_reward/raw_geo/std": 0.07458777022116438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1109.5625, "completions/mean_terminated_length": 932.0909423828125, "completions/min_length": 412.0, "completions/min_terminated_length": 412.0, "epoch": 0.9843968793758752, "frac_reward_zero_std": 0.0, "grad_norm": 2.4360831141381403, "kl": 0.0125579833984375, "learning_rate": 1.0068453289373414e-07, "loss": -0.0679, "num_tokens": 215559810.0, "reward": 0.0, "reward_std": 0.719819188117981, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.05000440749507426, "rewards/wordcountpos_reward/raw_geo/std": 0.3205910827065745, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.825, "rewards/wordcountpos_reward/raw_rule/std": 0.1517795672580372, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 1262.5625, "completions/mean_terminated_length": 1183.416748046875, "completions/min_length": 823.0, "completions/min_terminated_length": 823.0, "epoch": 0.9845969193838767, "frac_reward_zero_std": 0.0, "grad_norm": 3.118571448557724, "kl": 0.021728515625, "learning_rate": 1.0066731688760718e-07, "loss": -0.0075, "num_tokens": 215609915.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0127660036087036, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04911456067780599, "rewards/wordcountpos_reward/raw_geo/std": 0.1532703485191732, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1190.25, "completions/mean_terminated_length": 1169.60009765625, "completions/min_length": 1068.0, "completions/min_terminated_length": 1068.0, "epoch": 0.9847969593918784, "frac_reward_zero_std": 0.0, "grad_norm": 2.7668247347265935, "kl": 0.01258087158203125, "learning_rate": 1.006503199781369e-07, "loss": 0.0166, "num_tokens": 215659455.0, "reward": 0.0, "reward_std": 0.7089526057243347, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.03697079107847459, "rewards/wordcountpos_reward/raw_geo/std": 0.07459843359785423, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 1127.375, "completions/mean_terminated_length": 1127.375, "completions/min_length": 972.0, "completions/min_terminated_length": 972.0, "epoch": 0.98499699939988, "frac_reward_zero_std": 0.0, "grad_norm": 2.860018200357969, "kl": 0.0142364501953125, "learning_rate": 1.00633542173611e-07, "loss": 0.0172, "num_tokens": 215695093.0, "reward": 0.0, "reward_std": 0.7887802124023438, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.011504292269169753, "rewards/wordcountpos_reward/raw_geo/std": 0.05946823338094017, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 1238.0625, "completions/mean_terminated_length": 1220.60009765625, "completions/min_length": 1108.0, "completions/min_terminated_length": 1108.0, "epoch": 0.9851970394078816, "frac_reward_zero_std": 0.0, "grad_norm": 3.191584746246887, "kl": 0.016632080078125, "learning_rate": 1.0061698348221056e-07, "loss": 0.0288, "num_tokens": 215747654.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8410070538520813, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.262551310077561, "rewards/wordcountpos_reward/raw_geo/std": 0.11675795652913383, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 1166.1875, "completions/mean_terminated_length": 1143.933349609375, "completions/min_length": 785.0, "completions/min_terminated_length": 785.0, "epoch": 0.9853970794158832, "frac_reward_zero_std": 0.0, "grad_norm": 3.306803744313914, "kl": 0.019317626953125, "learning_rate": 1.006006439120095e-07, "loss": -0.0627, "num_tokens": 215786689.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9384487867355347, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.016907082462849297, "rewards/wordcountpos_reward/raw_geo/std": 0.09814340340007546, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10602235962635778, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 1257.0625, "completions/mean_terminated_length": 1222.357177734375, "completions/min_length": 940.0, "completions/min_terminated_length": 940.0, "epoch": 0.9855971194238847, "frac_reward_zero_std": 0.0, "grad_norm": 2.228705210242911, "kl": 0.0117950439453125, "learning_rate": 1.0058452347097519e-07, "loss": 0.0062, "num_tokens": 215829882.0, "reward": 0.0, "reward_std": 0.7299556732177734, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0707165627458825, "rewards/wordcountpos_reward/raw_geo/std": 0.08593261660155707, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.04849589520621153, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 974.5, "completions/mean_terminated_length": 974.5, "completions/min_length": 671.0, "completions/min_terminated_length": 671.0, "epoch": 0.9857971594318864, "frac_reward_zero_std": 0.0, "grad_norm": 3.133894480553154, "kl": 0.018707275390625, "learning_rate": 1.0056862216696798e-07, "loss": -0.0219, "num_tokens": 215871474.0, "reward": 0.0, "reward_std": 0.6438214182853699, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.1440955496108402, "rewards/wordcountpos_reward/raw_geo/std": 0.16046066980537274, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.05443310539518172, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1327.0, "completions/max_terminated_length": 1327.0, "completions/mean_length": 1068.625, "completions/mean_terminated_length": 1068.625, "completions/min_length": 831.0, "completions/min_terminated_length": 831.0, "epoch": 0.985997199439888, "frac_reward_zero_std": 0.0, "grad_norm": 2.9805518142818794, "kl": 0.0157623291015625, "learning_rate": 1.0055294000774147e-07, "loss": 0.0078, "num_tokens": 215906044.0, "reward": 0.0, "reward_std": 0.7682349681854248, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.15860405361897437, "rewards/wordcountpos_reward/raw_geo/std": 0.07256866510826801, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 1154.25, "completions/mean_terminated_length": 1074.4615478515625, "completions/min_length": 826.0, "completions/min_terminated_length": 826.0, "epoch": 0.9861972394478896, "frac_reward_zero_std": 0.0, "grad_norm": 2.4729691859220724, "kl": 0.0102691650390625, "learning_rate": 1.0053747700094221e-07, "loss": 0.0191, "num_tokens": 215951056.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8089938163757324, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.035104766118801776, "rewards/wordcountpos_reward/raw_geo/std": 0.09642730091389311, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08734775114237132, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 1193.3125, "completions/mean_terminated_length": 1149.5, "completions/min_length": 849.0, "completions/min_terminated_length": 849.0, "epoch": 0.9863972794558912, "frac_reward_zero_std": 0.0, "grad_norm": 3.4682545945859533, "kl": 0.0203857421875, "learning_rate": 1.0052223315411024e-07, "loss": -0.0066, "num_tokens": 216001741.0, "reward": -2.2351741790771484e-08, "reward_std": 0.9429806470870972, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.023542006832399834, "rewards/wordcountpos_reward/raw_geo/std": 0.3029892373366919, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116196, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 1148.3125, "completions/mean_terminated_length": 1148.3125, "completions/min_length": 925.0, "completions/min_terminated_length": 925.0, "epoch": 0.9865973194638927, "frac_reward_zero_std": 0.0, "grad_norm": 3.186363442235038, "kl": 0.020172119140625, "learning_rate": 1.0050720847467835e-07, "loss": -0.0105, "num_tokens": 216046474.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9194058179855347, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.24535128566493936, "rewards/wordcountpos_reward/raw_geo/std": 0.10822687210084683, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.08432740427115679, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 1080.25, "completions/mean_terminated_length": 1080.25, "completions/min_length": 861.0, "completions/min_terminated_length": 861.0, "epoch": 0.9867973594718944, "frac_reward_zero_std": 0.0, "grad_norm": 3.089674333185969, "kl": 0.0142974853515625, "learning_rate": 1.0049240296997271e-07, "loss": 0.0506, "num_tokens": 216092086.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0020864009857178, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.036878956349368254, "rewards/wordcountpos_reward/raw_geo/std": 0.1067373331910328, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07252075054258099, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 1111.0, "completions/mean_terminated_length": 1111.0, "completions/min_length": 891.0, "completions/min_terminated_length": 891.0, "epoch": 0.986997399479896, "frac_reward_zero_std": 0.0, "grad_norm": 2.7311474412767374, "kl": 0.0130767822265625, "learning_rate": 1.0047781664721258e-07, "loss": 0.0337, "num_tokens": 216135638.0, "reward": 0.0, "reward_std": 0.6871848106384277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.18424401426085146, "rewards/wordcountpos_reward/raw_geo/std": 0.06274513158008159, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1492.0, "completions/mean_length": 1263.5, "completions/mean_terminated_length": 1247.7333984375, "completions/min_length": 1010.0, "completions/min_terminated_length": 1010.0, "epoch": 0.9871974394878976, "frac_reward_zero_std": 0.0, "grad_norm": 2.8175092395641967, "kl": 0.0115509033203125, "learning_rate": 1.004634495135103e-07, "loss": -0.0335, "num_tokens": 216181702.0, "reward": 0.0, "reward_std": 1.005765438079834, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.028695759152388232, "rewards/wordcountpos_reward/raw_geo/std": 0.3868111435402115, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.060705726131767695, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 978.9375, "completions/mean_terminated_length": 978.9375, "completions/min_length": 781.0, "completions/min_terminated_length": 781.0, "epoch": 0.9873974794958992, "frac_reward_zero_std": 0.0, "grad_norm": 3.7608734787430578, "kl": 0.0185546875, "learning_rate": 1.0044930157587132e-07, "loss": -0.0055, "num_tokens": 216213461.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0020313262939453, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.030379221904537627, "rewards/wordcountpos_reward/raw_geo/std": 0.1771921495199709, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07781745019952502, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 1257.0625, "completions/mean_terminated_length": 1146.6363525390625, "completions/min_length": 911.0, "completions/min_terminated_length": 911.0, "epoch": 0.9875975195039007, "frac_reward_zero_std": 0.0, "grad_norm": 3.542766083514111, "kl": 0.02008056640625, "learning_rate": 1.0043537284119422e-07, "loss": 0.0255, "num_tokens": 216259342.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8083266019821167, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02514332870744921, "rewards/wordcountpos_reward/raw_geo/std": 0.06944244698205947, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.0625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.1134476547592341, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1381.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 955.75, "completions/mean_terminated_length": 955.75, "completions/min_length": 685.0, "completions/min_terminated_length": 685.0, "epoch": 0.9877975595119024, "frac_reward_zero_std": 0.0, "grad_norm": 2.363605189319584, "kl": 0.0088653564453125, "learning_rate": 1.0042166331627082e-07, "loss": 0.0197, "num_tokens": 216302266.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9970389604568481, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11398416031042057, "rewards/wordcountpos_reward/raw_geo/std": 0.18175207937299345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.05821416398857659, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 1098.125, "completions/mean_terminated_length": 1098.125, "completions/min_length": 915.0, "completions/min_terminated_length": 915.0, "epoch": 0.987997599519904, "frac_reward_zero_std": 0.0, "grad_norm": 3.4086442823981304, "kl": 0.0166168212890625, "learning_rate": 1.0040817300778582e-07, "loss": 0.0265, "num_tokens": 216344652.0, "reward": 0.0, "reward_std": 0.7979227304458618, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.061664521107654545, "rewards/wordcountpos_reward/raw_geo/std": 0.07038826145164472, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8666666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.10327955589886446, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1201.9375, "completions/mean_terminated_length": 1102.5833740234375, "completions/min_length": 775.0, "completions/min_terminated_length": 775.0, "epoch": 0.9881976395279056, "frac_reward_zero_std": 0.0, "grad_norm": 3.2153892360749596, "kl": 0.016815185546875, "learning_rate": 1.0039490192231727e-07, "loss": -0.0247, "num_tokens": 216387635.0, "reward": -2.2351741790771484e-08, "reward_std": 1.0063207149505615, "rewards/wordcountpos_reward/mean": -2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00944361829293568, "rewards/wordcountpos_reward/raw_geo/std": 0.276562290439972, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.0938872452190116, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 1051.75, "completions/mean_terminated_length": 1051.75, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.9883976795359072, "frac_reward_zero_std": 0.0, "grad_norm": 1.896392798648157, "kl": 0.00855255126953125, "learning_rate": 1.0038185006633612e-07, "loss": -0.0442, "num_tokens": 216422679.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6284111738204956, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.012691345861374539, "rewards/wordcountpos_reward/raw_geo/std": 0.058478519855309316, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262934, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 1218.875, "completions/mean_terminated_length": 1218.875, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.9885977195439087, "frac_reward_zero_std": 0.0, "grad_norm": 2.9200339311210035, "kl": 0.01580810546875, "learning_rate": 1.0036901744620654e-07, "loss": -0.0259, "num_tokens": 216465397.0, "reward": 1.4901161193847656e-08, "reward_std": 0.929694414138794, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.07218786267573007, "rewards/wordcountpos_reward/raw_geo/std": 0.11927748330209213, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.875, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452246, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 1340.5, "completions/mean_terminated_length": 1287.3333740234375, "completions/min_length": 1137.0, "completions/min_terminated_length": 1137.0, "epoch": 0.9887977595519104, "frac_reward_zero_std": 0.0, "grad_norm": 3.2926279789770216, "kl": 0.02655029296875, "learning_rate": 1.0035640406818585e-07, "loss": -0.0269, "num_tokens": 216512493.0, "reward": 1.1175870895385742e-08, "reward_std": 1.0659737586975098, "rewards/wordcountpos_reward/mean": 1.1175870895385742e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.06606540924966364, "rewards/wordcountpos_reward/raw_geo/std": 0.08050927767074534, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.093392838174146, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1482.0, "completions/mean_length": 1260.5625, "completions/mean_terminated_length": 1244.60009765625, "completions/min_length": 780.0, "completions/min_terminated_length": 780.0, "epoch": 0.988997799559912, "frac_reward_zero_std": 0.0, "grad_norm": 3.0197833639845095, "kl": 0.0218505859375, "learning_rate": 1.0034400993842429e-07, "loss": -0.0832, "num_tokens": 216556126.0, "reward": 0.0, "reward_std": 0.8698749542236328, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.0864485336949979, "rewards/wordcountpos_reward/raw_geo/std": 0.05155960425263074, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1487.0, "completions/max_terminated_length": 1487.0, "completions/mean_length": 1077.6875, "completions/mean_terminated_length": 1077.6875, "completions/min_length": 873.0, "completions/min_terminated_length": 873.0, "epoch": 0.9891978395679136, "frac_reward_zero_std": 0.0, "grad_norm": 3.6047675111106865, "kl": 0.020477294921875, "learning_rate": 1.003318350629653e-07, "loss": -0.0031, "num_tokens": 216595569.0, "reward": 2.2351741790771484e-08, "reward_std": 1.006204605102539, "rewards/wordcountpos_reward/mean": 2.2351741790771484e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.15675822013960136, "rewards/wordcountpos_reward/raw_geo/std": 0.1007711991830681, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8541666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.10101338378503963, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1414.0, "completions/mean_length": 1071.625, "completions/mean_terminated_length": 1043.0667724609375, "completions/min_length": 820.0, "completions/min_terminated_length": 820.0, "epoch": 0.9893978795759152, "frac_reward_zero_std": 0.0, "grad_norm": 3.303012400389194, "kl": 0.01483154296875, "learning_rate": 1.0031987944774548e-07, "loss": -0.0327, "num_tokens": 216625435.0, "reward": 0.0, "reward_std": 0.9866205453872681, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.01768874795524968, "rewards/wordcountpos_reward/raw_geo/std": 0.08499548340140621, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9791666666666666, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1425.0, "completions/mean_length": 1232.625, "completions/mean_terminated_length": 1214.800048828125, "completions/min_length": 859.0, "completions/min_terminated_length": 859.0, "epoch": 0.9895979195839167, "frac_reward_zero_std": 0.0, "grad_norm": 3.596521247119827, "kl": 0.019500732421875, "learning_rate": 1.0030814309859433e-07, "loss": -0.0162, "num_tokens": 216680133.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7375806570053101, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.02568413226245108, "rewards/wordcountpos_reward/raw_geo/std": 0.20738304790395662, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8375, "rewards/wordcountpos_reward/raw_rule/std": 0.09418264367902598, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1381.0, "completions/mean_length": 1230.0, "completions/mean_terminated_length": 1191.4285888671875, "completions/min_length": 957.0, "completions/min_terminated_length": 957.0, "epoch": 0.9897979595919184, "frac_reward_zero_std": 0.0, "grad_norm": 3.410882931171157, "kl": 0.0163726806640625, "learning_rate": 1.0029662602123469e-07, "loss": 0.003, "num_tokens": 216723805.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8581136465072632, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.0701979762069232, "rewards/wordcountpos_reward/raw_geo/std": 0.06931158713351658, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8166666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1180081604209045, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1490.0, "completions/mean_length": 1394.375, "completions/mean_terminated_length": 1370.0, "completions/min_length": 1200.0, "completions/min_terminated_length": 1200.0, "epoch": 0.98999799959992, "frac_reward_zero_std": 0.0, "grad_norm": 2.4964195411579952, "kl": 0.015289306640625, "learning_rate": 1.0028532822128222e-07, "loss": -0.0258, "num_tokens": 216771139.0, "reward": -2.9802322387695312e-08, "reward_std": 0.8089931607246399, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.00428184844967764, "rewards/wordcountpos_reward/raw_geo/std": 0.24689276323204717, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.038248698840130005, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1173.0, "completions/max_terminated_length": 1173.0, "completions/mean_length": 977.5625, "completions/mean_terminated_length": 977.5625, "completions/min_length": 689.0, "completions/min_terminated_length": 689.0, "epoch": 0.9901980396079216, "frac_reward_zero_std": 0.0, "grad_norm": 2.192224603642146, "kl": 0.00853729248046875, "learning_rate": 1.0027424970424583e-07, "loss": -0.0491, "num_tokens": 216805076.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8566557168960571, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06169914604596967, "rewards/wordcountpos_reward/raw_geo/std": 0.03703226987071167, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.07187952884282608, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1143.125, "completions/mean_terminated_length": 1119.3333740234375, "completions/min_length": 903.0, "completions/min_terminated_length": 903.0, "epoch": 0.9903980796159232, "frac_reward_zero_std": 0.0, "grad_norm": 3.0345470967528123, "kl": 0.0160675048828125, "learning_rate": 1.0026339047552743e-07, "loss": -0.0643, "num_tokens": 216855454.0, "reward": 0.0, "reward_std": 0.9635026454925537, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08382330854366342, "rewards/wordcountpos_reward/raw_geo/std": 0.08495967702181653, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9125, "rewards/wordcountpos_reward/raw_rule/std": 0.052880017930181294, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1380.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 1100.0, "completions/mean_terminated_length": 1100.0, "completions/min_length": 798.0, "completions/min_terminated_length": 798.0, "epoch": 0.9905981196239247, "frac_reward_zero_std": 0.0, "grad_norm": 3.39020770627094, "kl": 0.017913818359375, "learning_rate": 1.0025275054042207e-07, "loss": -0.0038, "num_tokens": 216907294.0, "reward": 0.0, "reward_std": 0.7369526028633118, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05541446759400518, "rewards/wordcountpos_reward/raw_geo/std": 0.06648517404724152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8875, "rewards/wordcountpos_reward/raw_rule/std": 0.08681611046941137, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1292.0, "completions/mean_length": 1234.5625, "completions/mean_terminated_length": 1113.9091796875, "completions/min_length": 953.0, "completions/min_terminated_length": 953.0, "epoch": 0.9907981596319264, "frac_reward_zero_std": 0.0, "grad_norm": 3.1649707493836834, "kl": 0.020751953125, "learning_rate": 1.0024232990411776e-07, "loss": -0.0256, "num_tokens": 216961463.0, "reward": -2.9802322387695312e-08, "reward_std": 0.76158607006073, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.16960989494483047, "rewards/wordcountpos_reward/raw_geo/std": 0.17785655175053533, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9583333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06831300510639733, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 1307.5, "completions/mean_terminated_length": 1157.77783203125, "completions/min_length": 860.0, "completions/min_terminated_length": 860.0, "epoch": 0.990998199639928, "frac_reward_zero_std": 0.0, "grad_norm": 2.6521516395626032, "kl": 0.0129241943359375, "learning_rate": 1.0023212857169576e-07, "loss": -0.0076, "num_tokens": 217010415.0, "reward": -2.9802322387695312e-08, "reward_std": 0.6332173347473145, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0032330880434548945, "rewards/wordcountpos_reward/raw_geo/std": 0.08566840163204605, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.045338235029118136, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 1084.3125, "completions/mean_terminated_length": 1084.3125, "completions/min_length": 886.0, "completions/min_terminated_length": 886.0, "epoch": 0.9911982396479296, "frac_reward_zero_std": 0.0, "grad_norm": 2.8190064024702437, "kl": 0.0127105712890625, "learning_rate": 1.0022214654813018e-07, "loss": -0.0229, "num_tokens": 217059836.0, "reward": 2.9802322387695312e-08, "reward_std": 0.9091815948486328, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.042685057460888735, "rewards/wordcountpos_reward/raw_geo/std": 0.11071386313721869, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09878896324620107, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1481.0, "completions/mean_length": 1320.625, "completions/mean_terminated_length": 1295.0, "completions/min_length": 1145.0, "completions/min_terminated_length": 1145.0, "epoch": 0.9913982796559312, "frac_reward_zero_std": 0.0, "grad_norm": 2.4163627431213004, "kl": 0.014312744140625, "learning_rate": 1.0021238383828834e-07, "loss": 0.0171, "num_tokens": 217107974.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9737574458122253, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.027643457205332254, "rewards/wordcountpos_reward/raw_geo/std": 0.2539740668677508, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.029814239699997188, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 1228.75, "completions/mean_terminated_length": 1228.75, "completions/min_length": 889.0, "completions/min_terminated_length": 889.0, "epoch": 0.9915983196639327, "frac_reward_zero_std": 0.0, "grad_norm": 2.749236594132949, "kl": 0.014862060546875, "learning_rate": 1.0020284044693057e-07, "loss": -0.0153, "num_tokens": 217163050.0, "reward": 0.0, "reward_std": 0.8842195272445679, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.06999845905494748, "rewards/wordcountpos_reward/raw_geo/std": 0.16109080226084663, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.1387777332977422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 1022.25, "completions/mean_terminated_length": 1022.25, "completions/min_length": 840.0, "completions/min_terminated_length": 840.0, "epoch": 0.9917983596719344, "frac_reward_zero_std": 0.0, "grad_norm": 3.74814191079442, "kl": 0.021087646484375, "learning_rate": 1.0019351637871036e-07, "loss": 0.0136, "num_tokens": 217212974.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0144426822662354, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.026948547209804625, "rewards/wordcountpos_reward/raw_geo/std": 0.061226776745579414, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.0958393717904348, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1458.0, "completions/max_terminated_length": 1458.0, "completions/mean_length": 1186.0625, "completions/mean_terminated_length": 1186.0625, "completions/min_length": 865.0, "completions/min_terminated_length": 865.0, "epoch": 0.991998399679936, "frac_reward_zero_std": 0.0, "grad_norm": 3.376493358148665, "kl": 0.017425537109375, "learning_rate": 1.0018441163817401e-07, "loss": -0.0296, "num_tokens": 217260159.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7278831601142883, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.04806666536782191, "rewards/wordcountpos_reward/raw_geo/std": 0.10767839449059999, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.08777074514725111, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 1116.3125, "completions/mean_terminated_length": 1116.3125, "completions/min_length": 778.0, "completions/min_terminated_length": 778.0, "epoch": 0.9921984396879376, "frac_reward_zero_std": 0.0, "grad_norm": 3.056856397723472, "kl": 0.01519775390625, "learning_rate": 1.0017552622976117e-07, "loss": 0.0229, "num_tokens": 217299444.0, "reward": 0.0, "reward_std": 0.7959402203559875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22134584540773328, "rewards/wordcountpos_reward/raw_geo/std": 0.24428255057703271, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.047919685895217376, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 1121.25, "completions/mean_terminated_length": 1121.25, "completions/min_length": 866.0, "completions/min_terminated_length": 866.0, "epoch": 0.9923984796959392, "frac_reward_zero_std": 0.0, "grad_norm": 3.002675814904877, "kl": 0.0170745849609375, "learning_rate": 1.0016686015780431e-07, "loss": 0.0318, "num_tokens": 217346088.0, "reward": 0.0, "reward_std": 0.4973548948764801, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.02731224040838564, "rewards/wordcountpos_reward/raw_geo/std": 0.1690725943908762, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.09888264649460884, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 1044.875, "completions/mean_terminated_length": 1044.875, "completions/min_length": 816.0, "completions/min_terminated_length": 816.0, "epoch": 0.9925985197039408, "frac_reward_zero_std": 0.0, "grad_norm": 3.5132873319304716, "kl": 0.01947021484375, "learning_rate": 1.001584134265291e-07, "loss": -0.0089, "num_tokens": 217375438.0, "reward": 0.0, "reward_std": 0.7281904220581055, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.22015361173997822, "rewards/wordcountpos_reward/raw_geo/std": 0.06808889234456256, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.033333333333333326, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1388.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1107.8125, "completions/mean_terminated_length": 1107.8125, "completions/min_length": 917.0, "completions/min_terminated_length": 917.0, "epoch": 0.9927985597119424, "frac_reward_zero_std": 0.0, "grad_norm": 3.371956714403953, "kl": 0.017242431640625, "learning_rate": 1.001501860400542e-07, "loss": 0.0309, "num_tokens": 217414107.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8861281275749207, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.02853155712908965, "rewards/wordcountpos_reward/raw_geo/std": 0.08185906897293939, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 908.0, "completions/mean_length": 754.0625, "completions/mean_terminated_length": 704.3333740234375, "completions/min_length": 578.0, "completions/min_terminated_length": 578.0, "epoch": 0.992998599719944, "frac_reward_zero_std": 0.0, "grad_norm": 3.715775226846267, "kl": 0.0137786865234375, "learning_rate": 1.001421780023913e-07, "loss": 0.0902, "num_tokens": 217438228.0, "reward": 2.9802322387695312e-08, "reward_std": 0.38744717836380005, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.00706583761064171, "rewards/wordcountpos_reward/raw_geo/std": 0.07445845676403988, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8833333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.11287488977066928, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 1154.5, "completions/mean_terminated_length": 1131.4666748046875, "completions/min_length": 731.0, "completions/min_terminated_length": 731.0, "epoch": 0.9931986397279456, "frac_reward_zero_std": 0.0, "grad_norm": 3.5142250198796523, "kl": 0.016448974609375, "learning_rate": 1.0013438931744517e-07, "loss": 0.0075, "num_tokens": 217476268.0, "reward": -2.9802322387695312e-08, "reward_std": 0.46976637840270996, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.015575441595044934, "rewards/wordcountpos_reward/raw_geo/std": 0.04599231559369345, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.925, "rewards/wordcountpos_reward/raw_rule/std": 0.0906764700582363, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1395.0, "completions/mean_length": 1274.75, "completions/mean_terminated_length": 1172.3636474609375, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.9933986797359472, "frac_reward_zero_std": 0.0, "grad_norm": 2.3469478497401606, "kl": 0.01113128662109375, "learning_rate": 1.0012681998901363e-07, "loss": 0.0033, "num_tokens": 217522576.0, "reward": -2.9802322387695312e-08, "reward_std": 0.7551934719085693, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.04577734803802386, "rewards/wordcountpos_reward/raw_geo/std": 0.08651482581728843, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.95, "rewards/wordcountpos_reward/raw_rule/std": 0.0620632890834175, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 1160.75, "completions/mean_terminated_length": 1160.75, "completions/min_length": 980.0, "completions/min_terminated_length": 980.0, "epoch": 0.9935987197439488, "frac_reward_zero_std": 0.0, "grad_norm": 2.811571026134171, "kl": 0.013641357421875, "learning_rate": 1.0011947002078743e-07, "loss": -0.0049, "num_tokens": 217563372.0, "reward": 0.0, "reward_std": 0.9575103521347046, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.04648068950198495, "rewards/wordcountpos_reward/raw_geo/std": 0.2067544489385805, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09953596037316068, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1299.0, "completions/max_terminated_length": 1299.0, "completions/mean_length": 1144.875, "completions/mean_terminated_length": 1144.875, "completions/min_length": 984.0, "completions/min_terminated_length": 984.0, "epoch": 0.9937987597519504, "frac_reward_zero_std": 0.0, "grad_norm": 2.284619791627082, "kl": 0.0126190185546875, "learning_rate": 1.0011233941635055e-07, "loss": 0.0171, "num_tokens": 217611746.0, "reward": 1.4901161193847656e-08, "reward_std": 1.0507359504699707, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.19796314888670882, "rewards/wordcountpos_reward/raw_geo/std": 0.16175001543279438, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9833333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.03849001794597504, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.4375, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1400.6875, "completions/mean_terminated_length": 1323.4444580078125, "completions/min_length": 1027.0, "completions/min_terminated_length": 1027.0, "epoch": 0.993998799759952, "frac_reward_zero_std": 0.0, "grad_norm": 2.8883813183840457, "kl": 0.0147247314453125, "learning_rate": 1.0010542817917989e-07, "loss": -0.0217, "num_tokens": 217650613.0, "reward": 0.0, "reward_std": 0.8563724756240845, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.15741006911460712, "rewards/wordcountpos_reward/raw_geo/std": 0.17877797583660152, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 1227.5625, "completions/mean_terminated_length": 1188.6429443359375, "completions/min_length": 885.0, "completions/min_terminated_length": 885.0, "epoch": 0.9941988397679536, "frac_reward_zero_std": 0.0, "grad_norm": 2.7445912399965695, "kl": 0.0157623291015625, "learning_rate": 1.0009873631264534e-07, "loss": -0.0234, "num_tokens": 217698942.0, "reward": -7.450580596923828e-09, "reward_std": 1.0107598304748535, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.02797762243401393, "rewards/wordcountpos_reward/raw_geo/std": 0.06473491538859232, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.07490735018081408, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1407.0, "completions/max_terminated_length": 1407.0, "completions/mean_length": 1028.9375, "completions/mean_terminated_length": 1028.9375, "completions/min_length": 651.0, "completions/min_terminated_length": 651.0, "epoch": 0.9943988797759552, "frac_reward_zero_std": 0.0, "grad_norm": 3.4397609131141373, "kl": 0.019012451171875, "learning_rate": 1.0009226382000994e-07, "loss": -0.0244, "num_tokens": 217731373.0, "reward": 0.0, "reward_std": 0.8558807373046875, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07163465802973826, "rewards/wordcountpos_reward/raw_geo/std": 0.1735284827865472, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.0769800358919501, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 1270.125, "completions/mean_terminated_length": 1217.0770263671875, "completions/min_length": 1013.0, "completions/min_terminated_length": 1013.0, "epoch": 0.9945989197839568, "frac_reward_zero_std": 0.0, "grad_norm": 3.012847621457632, "kl": 0.018035888671875, "learning_rate": 1.000860107044297e-07, "loss": -0.0241, "num_tokens": 217775543.0, "reward": 0.0, "reward_std": 0.9186972379684448, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.3094382425355858, "rewards/wordcountpos_reward/raw_geo/std": 0.19185408043432323, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5163977794943223, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1495.0, "completions/max_terminated_length": 1495.0, "completions/mean_length": 1184.0625, "completions/mean_terminated_length": 1184.0625, "completions/min_length": 893.0, "completions/min_terminated_length": 893.0, "epoch": 0.9947989597919584, "frac_reward_zero_std": 0.0, "grad_norm": 3.119947274641433, "kl": 0.0168304443359375, "learning_rate": 1.0007997696895365e-07, "loss": -0.0254, "num_tokens": 217829888.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8115808963775635, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07739629022066555, "rewards/wordcountpos_reward/raw_geo/std": 0.11671416752295212, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 1177.0625, "completions/mean_terminated_length": 1102.5384521484375, "completions/min_length": 648.0, "completions/min_terminated_length": 648.0, "epoch": 0.99499899979996, "frac_reward_zero_std": 0.0, "grad_norm": 3.5598738054252266, "kl": 0.01959228515625, "learning_rate": 1.0007416261652388e-07, "loss": -0.0623, "num_tokens": 217869465.0, "reward": -3.725290298461914e-09, "reward_std": 1.0614707469940186, "rewards/wordcountpos_reward/mean": -3.725290298461914e-09, "rewards/wordcountpos_reward/raw_geo/mean": -0.036654573652425235, "rewards/wordcountpos_reward/raw_geo/std": 0.05923991495801602, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.12164002752505566, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1382.0, "completions/max_terminated_length": 1382.0, "completions/mean_length": 1132.75, "completions/mean_terminated_length": 1132.75, "completions/min_length": 962.0, "completions/min_terminated_length": 962.0, "epoch": 0.9951990398079615, "frac_reward_zero_std": 0.0, "grad_norm": 3.197707031143796, "kl": 0.0170135498046875, "learning_rate": 1.0006856764997547e-07, "loss": -0.0373, "num_tokens": 217904413.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9857175350189209, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.1134012675250075, "rewards/wordcountpos_reward/raw_geo/std": 0.09067672614671157, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9625, "rewards/wordcountpos_reward/raw_rule/std": 0.04194352464039305, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1370.0, "completions/mean_length": 1144.6875, "completions/mean_terminated_length": 1093.9285888671875, "completions/min_length": 921.0, "completions/min_terminated_length": 921.0, "epoch": 0.9953990798159632, "frac_reward_zero_std": 0.0, "grad_norm": 3.213270150435261, "kl": 0.01788330078125, "learning_rate": 1.0006319207203659e-07, "loss": -0.0312, "num_tokens": 217950232.0, "reward": 0.0, "reward_std": 0.9018977284431458, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03475630784032804, "rewards/wordcountpos_reward/raw_geo/std": 0.05527558507779324, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09651328828101764, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1369.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1066.25, "completions/mean_terminated_length": 1066.25, "completions/min_length": 904.0, "completions/min_terminated_length": 904.0, "epoch": 0.9955991198239648, "frac_reward_zero_std": 0.0, "grad_norm": 3.45402870524043, "kl": 0.0171966552734375, "learning_rate": 1.0005803588532834e-07, "loss": 0.0133, "num_tokens": 217990188.0, "reward": 5.960464477539063e-08, "reward_std": 0.6941643357276917, "rewards/wordcountpos_reward/mean": 5.960464477539063e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.07449422182258625, "rewards/wordcountpos_reward/raw_geo/std": 0.34257185488888103, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8250000000000001, "rewards/wordcountpos_reward/raw_rule/std": 0.0969917904124231, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 1102.3125, "completions/mean_terminated_length": 1102.3125, "completions/min_length": 992.0, "completions/min_terminated_length": 992.0, "epoch": 0.9957991598319664, "frac_reward_zero_std": 0.0, "grad_norm": 2.593421210453095, "kl": 0.011474609375, "learning_rate": 1.00053099092365e-07, "loss": 0.0234, "num_tokens": 218035833.0, "reward": -7.450580596923828e-09, "reward_std": 1.0489870309829712, "rewards/wordcountpos_reward/mean": -7.450580596923828e-09, "rewards/wordcountpos_reward/raw_geo/mean": 0.11436795680762239, "rewards/wordcountpos_reward/raw_geo/std": 0.05754250120399704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.04367387557118565, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1420.0, "completions/max_terminated_length": 1420.0, "completions/mean_length": 1043.625, "completions/mean_terminated_length": 1043.625, "completions/min_length": 530.0, "completions/min_terminated_length": 530.0, "epoch": 0.995999199839968, "frac_reward_zero_std": 0.0, "grad_norm": 3.468446102932914, "kl": 0.020233154296875, "learning_rate": 1.0004838169555368e-07, "loss": -0.0239, "num_tokens": 218079315.0, "reward": -1.4901161193847656e-08, "reward_std": 0.941865086555481, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.10998315027643109, "rewards/wordcountpos_reward/raw_geo/std": 0.03174655220756934, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.1600347184554374, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 1500.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1126.625, "completions/mean_terminated_length": 1040.4615478515625, "completions/min_length": 696.0, "completions/min_terminated_length": 696.0, "epoch": 0.9961992398479695, "frac_reward_zero_std": 0.0, "grad_norm": 3.754566251065979, "kl": 0.020782470703125, "learning_rate": 1.000438836971946e-07, "loss": -0.0113, "num_tokens": 218123205.0, "reward": 0.0, "reward_std": 0.7885147333145142, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.05857429626721456, "rewards/wordcountpos_reward/raw_geo/std": 0.14431351827502228, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9083333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.06382847385042252, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1375.0, "completions/max_terminated_length": 1375.0, "completions/mean_length": 1190.5, "completions/mean_terminated_length": 1190.5, "completions/min_length": 990.0, "completions/min_terminated_length": 990.0, "epoch": 0.9963992798559712, "frac_reward_zero_std": 0.0, "grad_norm": 2.9130601060338583, "kl": 0.0142669677734375, "learning_rate": 1.0003960509948108e-07, "loss": 0.0122, "num_tokens": 218173269.0, "reward": 2.9802322387695312e-08, "reward_std": 0.5526738166809082, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.22333241698885903, "rewards/wordcountpos_reward/raw_geo/std": 0.12045504543000236, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9041666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.08421753138505422, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.25, "completions/max_length": 1500.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 1348.875, "completions/mean_terminated_length": 1298.5, "completions/min_length": 976.0, "completions/min_terminated_length": 976.0, "epoch": 0.9965993198639728, "frac_reward_zero_std": 0.0, "grad_norm": 3.3172551834727724, "kl": 0.019927978515625, "learning_rate": 1.0003554590449928e-07, "loss": -0.0667, "num_tokens": 218223363.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0209709405899048, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06742883650668643, "rewards/wordcountpos_reward/raw_geo/std": 0.10536285719113507, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.25, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.8958333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.10027739304327549, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1388.0, "completions/mean_length": 1191.6875, "completions/mean_terminated_length": 1051.5455322265625, "completions/min_length": 851.0, "completions/min_terminated_length": 851.0, "epoch": 0.9967993598719744, "frac_reward_zero_std": 0.0, "grad_norm": 3.1353179531586806, "kl": 0.014251708984375, "learning_rate": 1.000317061142286e-07, "loss": 0.0241, "num_tokens": 218264966.0, "reward": 0.0, "reward_std": 0.8941762447357178, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.07992082159194062, "rewards/wordcountpos_reward/raw_geo/std": 0.08907662507314489, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.7416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.18993176162525865, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 1205.5625, "completions/mean_terminated_length": 1205.5625, "completions/min_length": 906.0, "completions/min_terminated_length": 906.0, "epoch": 0.996999399879976, "frac_reward_zero_std": 0.0, "grad_norm": 3.131278767839235, "kl": 0.0161285400390625, "learning_rate": 1.0002808573054125e-07, "loss": -0.0049, "num_tokens": 218311919.0, "reward": 2.9802322387695312e-08, "reward_std": 1.0231021642684937, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.033598372729116956, "rewards/wordcountpos_reward/raw_geo/std": 0.11215618224496966, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.8125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.046943622609505783, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1391.0, "completions/max_terminated_length": 1391.0, "completions/mean_length": 1085.0, "completions/mean_terminated_length": 1085.0, "completions/min_length": 766.0, "completions/min_terminated_length": 766.0, "epoch": 0.9971994398879775, "frac_reward_zero_std": 0.0, "grad_norm": 3.4013792574733976, "kl": 0.0189971923828125, "learning_rate": 1.0002468475520259e-07, "loss": -0.0075, "num_tokens": 218363703.0, "reward": 0.0, "reward_std": 0.9402371048927307, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.14726332549902718, "rewards/wordcountpos_reward/raw_geo/std": 0.20060314009240401, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9375, "rewards/wordcountpos_reward/raw_rule/std": 0.08595864638818419, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 963.25, "completions/mean_terminated_length": 963.25, "completions/min_length": 683.0, "completions/min_terminated_length": 683.0, "epoch": 0.9973994798959792, "frac_reward_zero_std": 0.0, "grad_norm": 2.217709256662434, "kl": 0.0116424560546875, "learning_rate": 1.0002150318987096e-07, "loss": 0.0041, "num_tokens": 218409043.0, "reward": 0.0, "reward_std": 0.8956988453865051, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.038301664150764476, "rewards/wordcountpos_reward/raw_geo/std": 0.2678700709047565, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.3415650255319866, "rewards/wordcountpos_reward/raw_rule/mean": 0.9416666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.059004080210452226, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1369.0, "completions/mean_length": 1205.1875, "completions/mean_terminated_length": 1163.071533203125, "completions/min_length": 934.0, "completions/min_terminated_length": 934.0, "epoch": 0.9975995199039808, "frac_reward_zero_std": 0.0, "grad_norm": 2.4692669349413183, "kl": 0.0096588134765625, "learning_rate": 1.0001854103609764e-07, "loss": -0.0165, "num_tokens": 218455294.0, "reward": 0.0, "reward_std": 0.6189040541648865, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.12533450390875578, "rewards/wordcountpos_reward/raw_geo/std": 0.1266922191606704, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 1112.75, "completions/mean_terminated_length": 1112.75, "completions/min_length": 923.0, "completions/min_terminated_length": 923.0, "epoch": 0.9977995599119824, "frac_reward_zero_std": 0.0, "grad_norm": 3.0670577955273526, "kl": 0.013031005859375, "learning_rate": 1.000157982953271e-07, "loss": -0.0147, "num_tokens": 218496498.0, "reward": 0.0, "reward_std": 0.5274605751037598, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": -0.04127423382094638, "rewards/wordcountpos_reward/raw_geo/std": 0.04697255734675012, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.5625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9333333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.09428090415820634, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 1171.875, "completions/mean_terminated_length": 1150.0, "completions/min_length": 981.0, "completions/min_terminated_length": 981.0, "epoch": 0.997999599919984, "frac_reward_zero_std": 0.0, "grad_norm": 3.684139520195726, "kl": 0.0230712890625, "learning_rate": 1.0001327496889658e-07, "loss": -0.0252, "num_tokens": 218547288.0, "reward": 0.0, "reward_std": 0.5816168785095215, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.03311968757591795, "rewards/wordcountpos_reward/raw_geo/std": 0.07206985256398914, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.1875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.40311288741492746, "rewards/wordcountpos_reward/raw_rule/mean": 0.8458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.10532137766186216, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1498.0, "completions/mean_length": 1230.4375, "completions/mean_terminated_length": 1191.9285888671875, "completions/min_length": 947.0, "completions/min_terminated_length": 947.0, "epoch": 0.9981996399279855, "frac_reward_zero_std": 0.0, "grad_norm": 2.655934664493357, "kl": 0.0128936767578125, "learning_rate": 1.0001097105803655e-07, "loss": -0.0193, "num_tokens": 218591015.0, "reward": 0.0, "reward_std": 0.9305930137634277, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.2235362184402607, "rewards/wordcountpos_reward/raw_geo/std": 0.18209797268195144, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.08606629658238706, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 1500.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 1296.5, "completions/mean_terminated_length": 1282.933349609375, "completions/min_length": 1056.0, "completions/min_terminated_length": 1056.0, "epoch": 0.9983996799359872, "frac_reward_zero_std": 0.0, "grad_norm": 3.2210116920906966, "kl": 0.0153656005859375, "learning_rate": 1.0000888656387042e-07, "loss": 0.0196, "num_tokens": 218637135.0, "reward": 1.4901161193847656e-08, "reward_std": 0.9453748464584351, "rewards/wordcountpos_reward/mean": 1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.11233479896746766, "rewards/wordcountpos_reward/raw_geo/std": 0.07588600120191225, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.3125, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8916666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.053748384988656986, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 1059.125, "completions/mean_terminated_length": 1059.125, "completions/min_length": 724.0, "completions/min_terminated_length": 724.0, "epoch": 0.9985997199439888, "frac_reward_zero_std": 0.0, "grad_norm": 2.8969028475996366, "kl": 0.015106201171875, "learning_rate": 1.0000702148741455e-07, "loss": -0.0449, "num_tokens": 218688137.0, "reward": 2.9802322387695312e-08, "reward_std": 0.8458592295646667, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.06404574307152536, "rewards/wordcountpos_reward/raw_geo/std": 0.06127713607669952, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.4375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.51234753829798, "rewards/wordcountpos_reward/raw_rule/mean": 0.9541666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.06309898162000302, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.3125, "completions/max_length": 1500.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 1158.25, "completions/mean_terminated_length": 1002.9091186523438, "completions/min_length": 822.0, "completions/min_terminated_length": 822.0, "epoch": 0.9987997599519904, "frac_reward_zero_std": 0.0, "grad_norm": 2.2722777829591867, "kl": 0.0091552734375, "learning_rate": 1.0000537582957834e-07, "loss": -0.0258, "num_tokens": 218734941.0, "reward": -1.4901161193847656e-08, "reward_std": 0.8356724977493286, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.349327890716866, "rewards/wordcountpos_reward/raw_geo/std": 0.19470083833780583, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.04999999999999999, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1239.0, "completions/max_terminated_length": 1239.0, "completions/mean_length": 1083.5625, "completions/mean_terminated_length": 1083.5625, "completions/min_length": 933.0, "completions/min_terminated_length": 933.0, "epoch": 0.998999799959992, "frac_reward_zero_std": 0.0, "grad_norm": 3.1994022272822993, "kl": 0.018951416015625, "learning_rate": 1.0000394959116428e-07, "loss": -0.0076, "num_tokens": 218782550.0, "reward": 0.0, "reward_std": 0.8615015745162964, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.08862211028222311, "rewards/wordcountpos_reward/raw_geo/std": 0.10120364985649741, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.75, "rewards/wordcountpos_reward/raw_keypoint/std": 0.4472135954999579, "rewards/wordcountpos_reward/raw_rule/mean": 0.85, "rewards/wordcountpos_reward/raw_rule/std": 0.10749676997731401, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1258.0, "completions/max_terminated_length": 1258.0, "completions/mean_length": 1177.625, "completions/mean_terminated_length": 1177.625, "completions/min_length": 1017.0, "completions/min_terminated_length": 1017.0, "epoch": 0.9991998399679936, "frac_reward_zero_std": 0.0, "grad_norm": 3.388430744343186, "kl": 0.01678466796875, "learning_rate": 1.0000274277286783e-07, "loss": -0.0152, "num_tokens": 218820848.0, "reward": -2.9802322387695312e-08, "reward_std": 0.5211089849472046, "rewards/wordcountpos_reward/mean": -2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.05007652389223351, "rewards/wordcountpos_reward/raw_geo/std": 0.1196836158138338, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.9, "rewards/wordcountpos_reward/raw_rule/std": 0.07698003589195009, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 984.1875, "completions/mean_terminated_length": 984.1875, "completions/min_length": 773.0, "completions/min_terminated_length": 773.0, "epoch": 0.9993998799759952, "frac_reward_zero_std": 0.0, "grad_norm": 2.9125474594057654, "kl": 0.017303466796875, "learning_rate": 1.0000175537527733e-07, "loss": -0.0355, "num_tokens": 218859307.0, "reward": -1.4901161193847656e-08, "reward_std": 0.9213511943817139, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.0900211023477261, "rewards/wordcountpos_reward/raw_geo/std": 0.1773635490715624, "rewards/wordcountpos_reward/raw_keypoint/mean": 1.0, "rewards/wordcountpos_reward/raw_keypoint/std": 0.0, "rewards/wordcountpos_reward/raw_rule/mean": 0.9291666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.070841502796867, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 1062.3125, "completions/mean_terminated_length": 1062.3125, "completions/min_length": 936.0, "completions/min_terminated_length": 936.0, "epoch": 0.9995999199839968, "frac_reward_zero_std": 0.0, "grad_norm": 2.2294359956356913, "kl": 0.0110015869140625, "learning_rate": 1.0000098739887436e-07, "loss": -0.005, "num_tokens": 218894720.0, "reward": 0.0, "reward_std": 0.766258955001831, "rewards/wordcountpos_reward/mean": 0.0, "rewards/wordcountpos_reward/raw_geo/mean": 0.11799204184715265, "rewards/wordcountpos_reward/raw_geo/std": 0.09335685969609943, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.6875, "rewards/wordcountpos_reward/raw_keypoint/std": 0.47871355387816905, "rewards/wordcountpos_reward/raw_rule/mean": 0.8791666666666667, "rewards/wordcountpos_reward/raw_rule/std": 0.09496588081262933, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 1209.6875, "completions/mean_terminated_length": 1209.6875, "completions/min_length": 955.0, "completions/min_terminated_length": 955.0, "epoch": 0.9997999599919984, "frac_reward_zero_std": 0.0, "grad_norm": 2.865486715665, "kl": 0.0106964111328125, "learning_rate": 1.0000043884403331e-07, "loss": 0.0359, "num_tokens": 218941283.0, "reward": -1.4901161193847656e-08, "reward_std": 1.0073068141937256, "rewards/wordcountpos_reward/mean": -1.4901161193847656e-08, "rewards/wordcountpos_reward/raw_geo/mean": 0.057671906967011605, "rewards/wordcountpos_reward/raw_geo/std": 0.07006493235696477, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.9375, "rewards/wordcountpos_reward/raw_keypoint/std": 0.25, "rewards/wordcountpos_reward/raw_rule/mean": 0.9458333333333333, "rewards/wordcountpos_reward/raw_rule/std": 0.055611083361076424, "rewards/wordcountpos_reward/std": 1.0327955484390259, "step": 4998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 1137.875, "completions/mean_terminated_length": 1137.875, "completions/min_length": 1008.0, "completions/min_terminated_length": 1008.0, "epoch": 1.0, "frac_reward_zero_std": 0.0, "grad_norm": 2.939332327095818, "kl": 0.018096923828125, "learning_rate": 1.0000010971102171e-07, "loss": 0.0119, "num_tokens": 218986545.0, "reward": 2.9802322387695312e-08, "reward_std": 0.6641827821731567, "rewards/wordcountpos_reward/mean": 2.9802322387695312e-08, "rewards/wordcountpos_reward/raw_geo/mean": -0.007647827933047427, "rewards/wordcountpos_reward/raw_geo/std": 0.07427455713661646, "rewards/wordcountpos_reward/raw_keypoint/mean": 0.625, "rewards/wordcountpos_reward/raw_keypoint/std": 0.5, "rewards/wordcountpos_reward/raw_rule/mean": 0.9208333333333334, "rewards/wordcountpos_reward/raw_rule/std": 0.06540472290116194, "rewards/wordcountpos_reward/std": 1.0327956676483154, "step": 4999 } ], "logging_steps": 1, "max_steps": 4999, "num_input_tokens_seen": 218986545, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }